| Index: lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (revision 1709752) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (working copy) |
| @@ -83,11 +83,11 @@ |
| } |
| |
| void advance(int min) throws IOException { |
| - score(null, min, min); |
| + score(orCollector, null, min, min); |
| } |
| |
| - void score(Bits acceptDocs, int min, int max) throws IOException { |
| - next = scorer.score(orCollector, acceptDocs, min, max); |
| + void score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { |
| + next = scorer.score(collector, acceptDocs, min, max); |
| } |
| } |
| |
| @@ -179,6 +179,9 @@ |
| if (minShouldMatch < 1 || minShouldMatch > scorers.size()) { |
| throw new IllegalArgumentException("minShouldMatch should be within 1..num_scorers. Got " + minShouldMatch); |
| } |
| + if (scorers.size() <= 1) { |
| + throw new IllegalArgumentException("This scorer can only be used with two scorers or more, got " + scorers.size()); |
| + } |
| for (int i = 0; i < buckets.length; i++) { |
| buckets[i] = new Bucket(); |
| } |
| @@ -237,12 +240,12 @@ |
| } |
| } |
| |
| - private void scoreWindow(LeafCollector collector, Bits acceptDocs, int base, int min, int max, |
| - BulkScorerAndDoc[] scorers, int numScorers) throws IOException { |
| + private void scoreWindowIntoBitSetAndReplay(LeafCollector collector, Bits acceptDocs, |
| + int base, int min, int max, BulkScorerAndDoc[] scorers, int numScorers) throws IOException { |
| for (int i = 0; i < numScorers; ++i) { |
| final BulkScorerAndDoc scorer = scorers[i]; |
| assert scorer.next < max; |
| - scorer.score(acceptDocs, min, max); |
| + scorer.score(orCollector, acceptDocs, min, max); |
| } |
| |
| scoreMatches(collector, base); |
| @@ -270,14 +273,7 @@ |
| return headTop; |
| } |
| |
| - private void scoreWindow(LeafCollector collector, Bits acceptDocs, int windowBase, int windowMin, int windowMax) throws IOException { |
| - // Fill 'leads' with all scorers from 'head' that are in the right window |
| - leads[0] = head.pop(); |
| - int maxFreq = 1; |
| - while (head.size() > 0 && head.top().next < windowMax) { |
| - leads[maxFreq++] = head.pop(); |
| - } |
| - |
| + private void scoreWindowMultipleScorers(LeafCollector collector, Bits acceptDocs, int windowBase, int windowMin, int windowMax, int maxFreq) throws IOException { |
| while (maxFreq < minShouldMatch && maxFreq + tail.size() >= minShouldMatch) { |
| // a match is still possible |
| final BulkScorerAndDoc candidate = tail.pop(); |
| @@ -296,7 +292,7 @@ |
| } |
| tail.clear(); |
| |
| - scoreWindow(collector, acceptDocs, windowBase, windowMin, windowMax, leads, maxFreq); |
| + scoreWindowIntoBitSetAndReplay(collector, acceptDocs, windowBase, windowMin, windowMax, leads, maxFreq); |
| } |
| |
| // Push back scorers into head and tail |
| @@ -308,21 +304,64 @@ |
| } |
| } |
| |
| + private void scoreWindowSingleScorer(BulkScorerAndDoc bulkScorer, LeafCollector collector, |
| + Bits acceptDocs, int windowMin, int windowMax, int max) throws IOException { |
| + assert tail.size() == 0; |
| + final int nextWindowBase = head.top().next & ~MASK; |
| + final int end = Math.max(windowMax, Math.min(max, nextWindowBase)); |
| + |
| + bulkScorer.score(collector, acceptDocs, windowMin, end); |
| + |
| + // reset the scorer that should be used for the general case |
| + collector.setScorer(fakeScorer); |
| + } |
| + |
| + private BulkScorerAndDoc scoreWindow(BulkScorerAndDoc top, LeafCollector collector, |
| + LeafCollector singleClauseCollector, Bits acceptDocs, int min, int max) throws IOException { |
| + final int windowBase = top.next & ~MASK; // find the window that the next match belongs to |
| + final int windowMin = Math.max(min, windowBase); |
| + final int windowMax = Math.min(max, windowBase + SIZE); |
| + |
| + // Fill 'leads' with all scorers from 'head' that are in the right window |
| + leads[0] = head.pop(); |
| + int maxFreq = 1; |
| + while (head.size() > 0 && head.top().next < windowMax) { |
| + leads[maxFreq++] = head.pop(); |
| + } |
| + |
| + if (minShouldMatch == 1 && maxFreq == 1) { |
| + // special case: only one scorer can match in the current window, |
| + // we can collect directly |
| + final BulkScorerAndDoc bulkScorer = leads[0]; |
| + scoreWindowSingleScorer(bulkScorer, singleClauseCollector, acceptDocs, windowMin, windowMax, max); |
| + return head.add(bulkScorer); |
| + } else { |
| + // general case, collect through a bit set first and then replay |
| + scoreWindowMultipleScorers(collector, acceptDocs, windowBase, windowMin, windowMax, maxFreq); |
| + return head.top(); |
| + } |
| + } |
| + |
| @Override |
| public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { |
| fakeScorer.doc = -1; |
| collector.setScorer(fakeScorer); |
| |
| + final LeafCollector singleClauseCollector; |
| + if (coordFactors[1] == 1f) { |
| + singleClauseCollector = collector; |
| + } else { |
| + singleClauseCollector = new FilterLeafCollector(collector) { |
| + @Override |
| + public void setScorer(Scorer scorer) throws IOException { |
| + super.setScorer(new BooleanTopLevelScorers.BoostedScorer(scorer, coordFactors[1])); |
| + } |
| + }; |
| + } |
| + |
| BulkScorerAndDoc top = advance(min); |
| while (top.next < max) { |
| - |
| - final int windowBase = top.next & ~MASK; // find the window that the next match belongs to |
| - final int windowMin = Math.max(min, windowBase); |
| - final int windowMax = Math.min(max, windowBase + SIZE); |
| - |
| - // general case |
| - scoreWindow(collector, acceptDocs, windowBase, windowMin, windowMax); |
| - top = head.top(); |
| + top = scoreWindow(top, collector, singleClauseCollector, acceptDocs, min, max); |
| } |
| |
| return top.next; |
| Index: lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java (revision 1709752) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java (working copy) |
| @@ -22,6 +22,8 @@ |
| import java.util.Collection; |
| import java.util.Collections; |
| |
| +import org.apache.lucene.util.Bits; |
| + |
| /** Internal document-at-a-time scorers used to deal with stupid coord() computation */ |
| class BooleanTopLevelScorers { |
| |
| @@ -48,7 +50,39 @@ |
| return Collections.singleton(new ChildScorer(in, "BOOSTED")); |
| } |
| } |
| - |
| + |
| + /** |
| + * Used when there is more than one scorer in a query, but a segment |
| + * only had one non-null scorer. |
| + */ |
| + static class BoostedBulkScorer extends BulkScorer { |
| + |
| + final BulkScorer in; |
| + final float boost; |
| + |
| + BoostedBulkScorer(BulkScorer scorer, float boost) { |
| + this.in = scorer; |
| + this.boost = boost; |
| + } |
| + |
| + @Override |
| + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { |
| + final LeafCollector wrapped = new FilterLeafCollector(collector) { |
| + @Override |
| + public void setScorer(Scorer scorer) throws IOException { |
| + super.setScorer(new BoostedScorer(scorer, boost)); |
| + } |
| + }; |
| + return in.score(wrapped, acceptDocs, min, max); |
| + } |
| + |
| + @Override |
| + public long cost() { |
| + return in.cost(); |
| + } |
| + |
| + } |
| + |
| /** |
| * Used when there are both mandatory and optional clauses, but minShouldMatch |
| * dictates that some of the optional clauses must match. The query is a conjunction, |
| Index: lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java (revision 1709752) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java (working copy) |
| @@ -190,7 +190,7 @@ |
| /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} |
| * cannot be used. */ |
| // pkg-private for forcing use of BooleanScorer in tests |
| - BooleanScorer booleanScorer(LeafReaderContext context) throws IOException { |
| + BulkScorer booleanScorer(LeafReaderContext context) throws IOException { |
| List<BulkScorer> optional = new ArrayList<BulkScorer>(); |
| Iterator<BooleanClause> cIter = query.iterator(); |
| for (Weight w : weights) { |
| @@ -222,12 +222,21 @@ |
| return null; |
| } |
| |
| + if (optional.size() == 1) { |
| + BulkScorer opt = optional.get(0); |
| + if (!disableCoord && maxCoord > 1) { |
| + return new BooleanTopLevelScorers.BoostedBulkScorer(opt, coord(1, maxCoord)); |
| + } else { |
| + return opt; |
| + } |
| + } |
| + |
| return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, query.getMinimumNumberShouldMatch()), needsScores); |
| } |
| |
| @Override |
| public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { |
| - final BooleanScorer bulkScorer = booleanScorer(context); |
| + final BulkScorer bulkScorer = booleanScorer(context); |
| if (bulkScorer != null) { // BooleanScorer is applicable |
| // TODO: what is the right heuristic here? |
| final long costThreshold; |
| Index: lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (revision 1709752) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (working copy) |
| @@ -22,13 +22,19 @@ |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| +import org.apache.lucene.document.StringField; |
| +import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| +import org.apache.lucene.search.Weight.DefaultBulkScorer; |
| +import org.apache.lucene.search.similarities.ClassicSimilarity; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.TestUtil; |
| |
| public class TestBooleanScorer extends LuceneTestCase { |
| private static final String FIELD = "category"; |
| @@ -141,4 +147,98 @@ |
| r.close(); |
| dir.close(); |
| } |
| + |
| + public void testOptimizeTopLevelClauseOrNull() throws IOException { |
| + // When there is a single non-null scorer, this scorer should be used |
| + // directly |
| + Directory dir = newDirectory(); |
| + RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| + Document doc = new Document(); |
| + doc.add(new StringField("foo", "bar", Store.NO)); |
| + w.addDocument(doc); |
| + IndexReader reader = w.getReader(); |
| + IndexSearcher searcher = new IndexSearcher(reader); |
| + searcher.setQueryCache(null); // so that weights are not wrapped |
| + final LeafReaderContext ctx = reader.leaves().get(0); |
| + Query query = new BooleanQuery.Builder() |
| + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) // existing term |
| + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) // missing term |
| + .build(); |
| + |
| + // no scores -> term scorer |
| + Weight weight = searcher.createNormalizedWeight(query, false); |
| + BulkScorer scorer = ((BooleanWeight) weight).booleanScorer(ctx); |
| + assertTrue(scorer instanceof DefaultBulkScorer); // term scorer |
| + |
| + // disabled coords -> term scorer |
| + query = new BooleanQuery.Builder() |
| + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) // existing term |
| + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) // missing term |
| + .setDisableCoord(true) |
| + .build(); |
| + weight = searcher.createNormalizedWeight(query, true); |
| + scorer = ((BooleanWeight) weight).booleanScorer(ctx); |
| + assertTrue(scorer instanceof DefaultBulkScorer); // term scorer |
| + |
| + // enabled coords -> BoostedBulkScorer |
| + searcher.setSimilarity(new ClassicSimilarity()); |
| + query = new BooleanQuery.Builder() |
| + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) // existing term |
| + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) // missing term |
| + .build(); |
| + weight = searcher.createNormalizedWeight(query, true); |
| + scorer = ((BooleanWeight) weight).booleanScorer(ctx); |
| + assertTrue(scorer instanceof BooleanTopLevelScorers.BoostedBulkScorer); |
| + |
| + w.close(); |
| + reader.close(); |
| + dir.close(); |
| + } |
| + |
| + public void testSparseClauseOptimization() throws IOException { |
| + // When some windows have only one scorer that can match, the scorer will |
| + // directly call the collector in this window |
| + Directory dir = newDirectory(); |
| + RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| + Document emptyDoc = new Document(); |
| + for (int i = random().nextInt(5000); i >= 0; --i) { |
| + w.addDocument(emptyDoc); |
| + } |
| + StringField field = new StringField("field", "foo", Store.NO); |
| + Document doc = new Document(); |
| + doc.add(field); |
| + w.addDocument(doc); |
| + for (int i = TestUtil.nextInt(random(), 3000, 5000); i >= 0; --i) { |
| + w.addDocument(emptyDoc); |
| + } |
| + field.setStringValue("bar"); |
| + w.addDocument(doc); |
| + for (int i = TestUtil.nextInt(random(), 3000, 5000); i >= 0; --i) { |
| + w.addDocument(emptyDoc); |
| + } |
| + field.setStringValue("baz"); |
| + w.addDocument(doc); |
| + for (int i = TestUtil.nextInt(random(), 3000, 5000); i >= 0; --i) { |
| + w.addDocument(emptyDoc); |
| + } |
| + if (random().nextBoolean()) { |
| + w.forceMerge(1); |
| + } |
| + IndexReader reader = w.getReader(); |
| + IndexSearcher searcher = newSearcher(reader); |
| + |
| + Query query = new BooleanQuery.Builder() |
| + .add(new BoostQuery(new TermQuery(new Term("field", "foo")), 3), Occur.SHOULD) |
| + .add(new BoostQuery(new TermQuery(new Term("field", "bar")), 3), Occur.SHOULD) |
| + .add(new BoostQuery(new TermQuery(new Term("field", "baz")), 3), Occur.SHOULD) |
| + .setDisableCoord(random().nextBoolean()) |
| + .build(); |
| + |
| + // duel BS1 vs. BS2 |
| + QueryUtils.check(random(), query, searcher); |
| + |
| + reader.close(); |
| + w.close(); |
| + dir.close(); |
| + } |
| } |