| package org.apache.lucene.search; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.List; |
| |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.search.BooleanQuery.BooleanWeight; |
| |
| /* Description from Doug Cutting (excerpted from |
| * LUCENE-1483): |
| * |
| * BooleanScorer uses an array to score windows of |
| * 2K docs. So it scores docs 0-2K first, then docs 2K-4K, |
| * etc. For each window it iterates through all query terms |
| * and accumulates a score in table[doc%2K]. It also stores |
| * in the table a bitmask representing which terms |
| * contributed to the score. Non-zero scores are chained in |
| * a linked list. At the end of scoring each window it then |
| * iterates through the linked list and, if the bitmask |
| * matches the boolean constraints, collects a hit. For |
| * boolean queries with lots of frequent terms this can be |
| * much faster, since it does not need to update a priority |
| * queue for each posting, instead performing constant-time |
| * operations per posting. The only downside is that it |
| * results in hits being delivered out-of-order within the |
| * window, which means it cannot be nested within other |
| * scorers. But it works well as a top-level scorer. |
| * |
| * The new BooleanScorer2 implementation instead works by |
| * merging priority queues of postings, albeit with some |
| * clever tricks. For example, a pure conjunction (all terms |
| * required) does not require a priority queue. Instead it |
| * sorts the posting streams at the start, then repeatedly |
| * skips the first to to the last. If the first ever equals |
| * the last, then there's a hit. When some terms are |
| * required and some terms are optional, the conjunction can |
| * be evaluated first, then the optional terms can all skip |
| * to the match and be added to the score. Thus the |
| * conjunction can reduce the number of priority queue |
| * updates for the optional terms. */ |
| |
| final class BooleanScorer extends Scorer { |
| |
| private static final class BooleanScorerCollector extends Collector { |
| private BucketTable bucketTable; |
| private int mask; |
| private Scorer scorer; |
| |
| public BooleanScorerCollector(int mask, BucketTable bucketTable) { |
| this.mask = mask; |
| this.bucketTable = bucketTable; |
| } |
| |
| @Override |
| public void collect(final int doc) throws IOException { |
| final BucketTable table = bucketTable; |
| final int i = doc & BucketTable.MASK; |
| final Bucket bucket = table.buckets[i]; |
| |
| if (bucket.doc != doc) { // invalid bucket |
| bucket.doc = doc; // set doc |
| bucket.score = scorer.score(); // initialize score |
| bucket.bits = mask; // initialize mask |
| bucket.coord = 1; // initialize coord |
| |
| bucket.next = table.first; // push onto valid list |
| table.first = bucket; |
| } else { // valid bucket |
| bucket.score += scorer.score(); // increment score |
| bucket.bits |= mask; // add bits in mask |
| bucket.coord++; // increment coord |
| } |
| } |
| |
| @Override |
| public void setNextReader(AtomicReaderContext context) { |
| // not needed by this implementation |
| } |
| |
| @Override |
| public void setScorer(Scorer scorer) { |
| this.scorer = scorer; |
| } |
| |
| @Override |
| public boolean acceptsDocsOutOfOrder() { |
| return true; |
| } |
| |
| } |
| |
| // An internal class which is used in score(Collector, int) for setting the |
| // current score. This is required since Collector exposes a setScorer method |
| // and implementations that need the score will call scorer.score(). |
| // Therefore the only methods that are implemented are score() and doc(). |
| private static final class BucketScorer extends Scorer { |
| |
| double score; |
| int doc = NO_MORE_DOCS; |
| int freq; |
| |
| public BucketScorer(Weight weight) { super(weight); } |
| |
| @Override |
| public int advance(int target) { return NO_MORE_DOCS; } |
| |
| @Override |
| public int docID() { return doc; } |
| |
| @Override |
| public int freq() { return freq; } |
| |
| @Override |
| public int nextDoc() { return NO_MORE_DOCS; } |
| |
| @Override |
| public float score() { return (float)score; } |
| |
| @Override |
| public long cost() { return 1; } |
| |
| } |
| |
| static final class Bucket { |
| int doc = -1; // tells if bucket is valid |
| double score; // incremental score |
| // TODO: break out bool anyProhibited, int |
| // numRequiredMatched; then we can remove 32 limit on |
| // required clauses |
| int bits; // used for bool constraints |
| int coord; // count of terms in score |
| Bucket next; // next valid bucket |
| } |
| |
| /** A simple hash table of document scores within a range. */ |
| static final class BucketTable { |
| public static final int SIZE = 1 << 11; |
| public static final int MASK = SIZE - 1; |
| |
| final Bucket[] buckets = new Bucket[SIZE]; |
| Bucket first = null; // head of valid list |
| |
| public BucketTable() { |
| // Pre-fill to save the lazy init when collecting |
| // each sub: |
| for(int idx=0;idx<SIZE;idx++) { |
| buckets[idx] = new Bucket(); |
| } |
| } |
| |
| public Collector newCollector(int mask) { |
| return new BooleanScorerCollector(mask, this); |
| } |
| |
| public int size() { return SIZE; } |
| } |
| |
| static final class SubScorer { |
| public Scorer scorer; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //public boolean required = false; |
| public boolean prohibited; |
| public Collector collector; |
| public SubScorer next; |
| |
| public SubScorer(Scorer scorer, boolean required, boolean prohibited, |
| Collector collector, SubScorer next) { |
| if (required) { |
| throw new IllegalArgumentException("this scorer cannot handle required=true"); |
| } |
| this.scorer = scorer; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //this.required = required; |
| this.prohibited = prohibited; |
| this.collector = collector; |
| this.next = next; |
| } |
| } |
| |
| private SubScorer scorers = null; |
| private BucketTable bucketTable = new BucketTable(); |
| private final float[] coordFactors; |
| // TODO: re-enable this if BQ ever sends us required clauses |
| //private int requiredMask = 0; |
| private final int minNrShouldMatch; |
| private int end; |
| private Bucket current; |
| // Any time a prohibited clause matches we set bit 0: |
| private static final int PROHIBITED_MASK = 1; |
| |
| BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, |
| List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException { |
| super(weight); |
| this.minNrShouldMatch = minNrShouldMatch; |
| |
| if (optionalScorers != null && optionalScorers.size() > 0) { |
| for (Scorer scorer : optionalScorers) { |
| if (scorer.nextDoc() != NO_MORE_DOCS) { |
| scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers); |
| } |
| } |
| } |
| |
| if (prohibitedScorers != null && prohibitedScorers.size() > 0) { |
| for (Scorer scorer : prohibitedScorers) { |
| if (scorer.nextDoc() != NO_MORE_DOCS) { |
| scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers); |
| } |
| } |
| } |
| |
| coordFactors = new float[optionalScorers.size() + 1]; |
| for (int i = 0; i < coordFactors.length; i++) { |
| coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); |
| } |
| } |
| |
| // firstDocID is ignored since nextDoc() initializes 'current' |
| @Override |
| public boolean score(Collector collector, int max, int firstDocID) throws IOException { |
| // Make sure it's only BooleanScorer that calls us: |
| assert firstDocID == -1; |
| boolean more; |
| Bucket tmp; |
| BucketScorer bs = new BucketScorer(weight); |
| |
| // The internal loop will set the score and doc before calling collect. |
| collector.setScorer(bs); |
| do { |
| bucketTable.first = null; |
| |
| while (current != null) { // more queued |
| |
| // check prohibited & required |
| if ((current.bits & PROHIBITED_MASK) == 0) { |
| |
| // TODO: re-enable this if BQ ever sends us required |
| // clauses |
| //&& (current.bits & requiredMask) == requiredMask) { |
| |
| // NOTE: Lucene always passes max = |
| // Integer.MAX_VALUE today, because we never embed |
| // a BooleanScorer inside another (even though |
| // that should work)... but in theory an outside |
| // app could pass a different max so we must check |
| // it: |
| if (current.doc >= max){ |
| tmp = current; |
| current = current.next; |
| tmp.next = bucketTable.first; |
| bucketTable.first = tmp; |
| continue; |
| } |
| |
| if (current.coord >= minNrShouldMatch) { |
| bs.score = current.score * coordFactors[current.coord]; |
| bs.doc = current.doc; |
| bs.freq = current.coord; |
| collector.collect(current.doc); |
| } |
| } |
| |
| current = current.next; // pop the queue |
| } |
| |
| if (bucketTable.first != null){ |
| current = bucketTable.first; |
| bucketTable.first = current.next; |
| return true; |
| } |
| |
| // refill the queue |
| more = false; |
| end += BucketTable.SIZE; |
| for (SubScorer sub = scorers; sub != null; sub = sub.next) { |
| int subScorerDocID = sub.scorer.docID(); |
| if (subScorerDocID != NO_MORE_DOCS) { |
| more |= sub.scorer.score(sub.collector, end, subScorerDocID); |
| } |
| } |
| current = bucketTable.first; |
| |
| } while (current != null || more); |
| |
| return false; |
| } |
| |
| @Override |
| public int advance(int target) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int docID() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int nextDoc() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public float score() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int freq() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| return Integer.MAX_VALUE; |
| } |
| |
| @Override |
| public void score(Collector collector) throws IOException { |
| score(collector, Integer.MAX_VALUE, -1); |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder buffer = new StringBuilder(); |
| buffer.append("boolean("); |
| for (SubScorer sub = scorers; sub != null; sub = sub.next) { |
| buffer.append(sub.scorer.toString()); |
| buffer.append(" "); |
| } |
| buffer.append(")"); |
| return buffer.toString(); |
| } |
| |
| @Override |
| public Collection<ChildScorer> getChildren() { |
| throw new UnsupportedOperationException(); |
| } |
| } |