| package org.apache.lucene.search; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.List; |
| |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.search.BooleanQuery.BooleanWeight; |
| |
| /* Description from Doug Cutting (excerpted from |
| * LUCENE-1483): |
| * |
| * BooleanScorer uses an array to score windows of |
| * 2K docs. So it scores docs 0-2K first, then docs 2K-4K, |
| * etc. For each window it iterates through all query terms |
| * and accumulates a score in table[doc%2K]. It also stores |
| * in the table a bitmask representing which terms |
| * contributed to the score. Non-zero scores are chained in |
| * a linked list. At the end of scoring each window it then |
| * iterates through the linked list and, if the bitmask |
| * matches the boolean constraints, collects a hit. For |
| * boolean queries with lots of frequent terms this can be |
| * much faster, since it does not need to update a priority |
| * queue for each posting, instead performing constant-time |
| * operations per posting. The only downside is that it |
| * results in hits being delivered out-of-order within the |
| * window, which means it cannot be nested within other |
| * scorers. But it works well as a top-level scorer. |
| * |
| * The new BooleanScorer2 implementation instead works by |
| * merging priority queues of postings, albeit with some |
| * clever tricks. For example, a pure conjunction (all terms |
| * required) does not require a priority queue. Instead it |
| * sorts the posting streams at the start, then repeatedly |
| * skips the first to to the last. If the first ever equals |
| * the last, then there's a hit. When some terms are |
| * required and some terms are optional, the conjunction can |
| * be evaluated first, then the optional terms can all skip |
| * to the match and be added to the score. Thus the |
| * conjunction can reduce the number of priority queue |
| * updates for the optional terms. */ |
| |
| final class BooleanScorer extends BulkScorer { |
| |
| private static final class BooleanScorerCollector extends SimpleCollector { |
| private BucketTable bucketTable; |
| private int mask; |
| private Scorer scorer; |
| |
| public BooleanScorerCollector(int mask, BucketTable bucketTable) { |
| this.mask = mask; |
| this.bucketTable = bucketTable; |
| } |
| |
| @Override |
| public void collect(final int doc) throws IOException { |
| final BucketTable table = bucketTable; |
| final int i = doc & BucketTable.MASK; |
| final Bucket bucket = table.buckets[i]; |
| |
| if (bucket.doc != doc) { // invalid bucket |
| bucket.doc = doc; // set doc |
| bucket.score = scorer.score(); // initialize score |
| bucket.bits = mask; // initialize mask |
| bucket.coord = 1; // initialize coord |
| |
| bucket.next = table.first; // push onto valid list |
| table.first = bucket; |
| } else { // valid bucket |
| bucket.score += scorer.score(); // increment score |
| bucket.bits |= mask; // add bits in mask |
| bucket.coord++; // increment coord |
| } |
| } |
| |
| @Override |
| public void setScorer(Scorer scorer) { |
| this.scorer = scorer; |
| } |
| |
| @Override |
| public boolean acceptsDocsOutOfOrder() { |
| return true; |
| } |
| |
| } |
| |
| static final class Bucket { |
| int doc = -1; // tells if bucket is valid |
| double score; // incremental score |
| // TODO: break out bool anyProhibited, int |
| // numRequiredMatched; then we can remove 32 limit on |
| // required clauses |
| int bits; // used for bool constraints |
| int coord; // count of terms in score |
| Bucket next; // next valid bucket |
| } |
| |
| /** A simple hash table of document scores within a range. */ |
| static final class BucketTable { |
| public static final int SIZE = 1 << 11; |
| public static final int MASK = SIZE - 1; |
| |
| final Bucket[] buckets = new Bucket[SIZE]; |
| Bucket first = null; // head of valid list |
| |
| public BucketTable() { |
| // Pre-fill to save the lazy init when collecting |
| // each sub: |
| for(int idx=0;idx<SIZE;idx++) { |
| buckets[idx] = new Bucket(); |
| } |
| } |
| |
| public LeafCollector newCollector(int mask) { |
| return new BooleanScorerCollector(mask, this); |
| } |
| |
| public int size() { return SIZE; } |
| } |
| |
| static final class SubScorer { |
| public BulkScorer scorer; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //public boolean required = false; |
| public boolean prohibited; |
| public LeafCollector collector; |
| public SubScorer next; |
| public boolean more; |
| |
| public SubScorer(BulkScorer scorer, boolean required, boolean prohibited, |
| LeafCollector collector, SubScorer next) { |
| if (required) { |
| throw new IllegalArgumentException("this scorer cannot handle required=true"); |
| } |
| this.scorer = scorer; |
| this.more = true; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //this.required = required; |
| this.prohibited = prohibited; |
| this.collector = collector; |
| this.next = next; |
| } |
| } |
| |
| private SubScorer scorers = null; |
| private BucketTable bucketTable = new BucketTable(); |
| private final float[] coordFactors; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //private int requiredMask = 0; |
| private final int minNrShouldMatch; |
| private int end; |
| private Bucket current; |
| // Any time a prohibited clause matches we set bit 0: |
| private static final int PROHIBITED_MASK = 1; |
| |
| private final Weight weight; |
| |
| BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, |
| List<BulkScorer> optionalScorers, List<BulkScorer> prohibitedScorers, int maxCoord) throws IOException { |
| this.minNrShouldMatch = minNrShouldMatch; |
| this.weight = weight; |
| |
| for (BulkScorer scorer : optionalScorers) { |
| scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers); |
| } |
| |
| for (BulkScorer scorer : prohibitedScorers) { |
| scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers); |
| } |
| |
| coordFactors = new float[optionalScorers.size() + 1]; |
| for (int i = 0; i < coordFactors.length; i++) { |
| coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); |
| } |
| } |
| |
| @Override |
| public boolean score(LeafCollector collector, int max) throws IOException { |
| |
| boolean more; |
| Bucket tmp; |
| FakeScorer fs = new FakeScorer(); |
| |
| // The internal loop will set the score and doc before calling collect. |
| collector.setScorer(fs); |
| do { |
| bucketTable.first = null; |
| |
| while (current != null) { // more queued |
| |
| // check prohibited & required |
| if ((current.bits & PROHIBITED_MASK) == 0) { |
| |
| // TODO: re-enable this if BQ ever sends us required |
| // clauses |
| //&& (current.bits & requiredMask) == requiredMask) { |
| |
| // NOTE: Lucene always passes max = |
| // Integer.MAX_VALUE today, because we never embed |
| // a BooleanScorer inside another (even though |
| // that should work)... but in theory an outside |
| // app could pass a different max so we must check |
| // it: |
| if (current.doc >= max) { |
| tmp = current; |
| current = current.next; |
| tmp.next = bucketTable.first; |
| bucketTable.first = tmp; |
| continue; |
| } |
| |
| if (current.coord >= minNrShouldMatch) { |
| fs.score = (float) (current.score * coordFactors[current.coord]); |
| fs.doc = current.doc; |
| fs.freq = current.coord; |
| collector.collect(current.doc); |
| } |
| } |
| |
| current = current.next; // pop the queue |
| } |
| |
| if (bucketTable.first != null){ |
| current = bucketTable.first; |
| bucketTable.first = current.next; |
| return true; |
| } |
| |
| // refill the queue |
| more = false; |
| end += BucketTable.SIZE; |
| for (SubScorer sub = scorers; sub != null; sub = sub.next) { |
| if (sub.more) { |
| sub.more = sub.scorer.score(sub.collector, end); |
| more |= sub.more; |
| } |
| } |
| current = bucketTable.first; |
| |
| } while (current != null || more); |
| |
| return false; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder buffer = new StringBuilder(); |
| buffer.append("boolean("); |
| for (SubScorer sub = scorers; sub != null; sub = sub.next) { |
| buffer.append(sub.scorer.toString()); |
| buffer.append(" "); |
| } |
| buffer.append(")"); |
| return buffer.toString(); |
| } |
| } |