lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;

 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.search.BooleanQuery.BooleanWeight;

 /* Description from Doug Cutting (excerpted from
  * LUCENE-1483):
  *
  * BooleanScorer uses an array to score windows of
  * 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
  * etc. For each window it iterates through all query terms
  * and accumulates a score in table[doc%2K]. It also stores
  * in the table a bitmask representing which terms
  * contributed to the score. Non-zero scores are chained in
  * a linked list. At the end of scoring each window it then
  * iterates through the linked list and, if the bitmask
  * matches the boolean constraints, collects a hit. For
  * boolean queries with lots of frequent terms this can be
  * much faster, since it does not need to update a priority
  * queue for each posting, instead performing constant-time
  * operations per posting. The only downside is that it
  * results in hits being delivered out-of-order within the
  * window, which means it cannot be nested within other
  * scorers. But it works well as a top-level scorer.
  *
  * The new BooleanScorer2 implementation instead works by
  * merging priority queues of postings, albeit with some
  * clever tricks. For example, a pure conjunction (all terms
  * required) does not require a priority queue. Instead it
  * sorts the posting streams at the start, then repeatedly
  * skips the first to to the last. If the first ever equals
  * the last, then there's a hit. When some terms are
  * required and some terms are optional, the conjunction can
  * be evaluated first, then the optional terms can all skip
  * to the match and be added to the score. Thus the
  * conjunction can reduce the number of priority queue
  * updates for the optional terms. */

 final class BooleanScorer extends Scorer {

   private static final class BooleanScorerCollector extends Collector {
     private BucketTable bucketTable;
     private int mask;
     private Scorer scorer;

     public BooleanScorerCollector(int mask, BucketTable bucketTable) {
       this.mask = mask;
       this.bucketTable = bucketTable;
     }

     @Override
     public void collect(final int doc) throws IOException {
       final BucketTable table = bucketTable;
       final int i = doc & BucketTable.MASK;
       final Bucket bucket = table.buckets[i];

       if (bucket.doc != doc) {                    // invalid bucket
         bucket.doc = doc;                         // set doc
         bucket.score = scorer.score();            // initialize score
         bucket.bits = mask;                       // initialize mask
         bucket.coord = 1;                         // initialize coord

         bucket.next = table.first;                // push onto valid list
         table.first = bucket;
       } else {                                    // valid bucket
         bucket.score += scorer.score();           // increment score
         bucket.bits |= mask;                      // add bits in mask
         bucket.coord++;                           // increment coord
       }
     }

     @Override
     public void setNextReader(AtomicReaderContext context) {
       // not needed by this implementation
     }

     @Override
     public void setScorer(Scorer scorer) {
       this.scorer = scorer;
     }

     @Override
     public boolean acceptsDocsOutOfOrder() {
       return true;
     }

   }

   // An internal class which is used in score(Collector, int) for setting the
   // current score. This is required since Collector exposes a setScorer method
   // and implementations that need the score will call scorer.score().
   // Therefore the only methods that are implemented are score() and doc().
   private static final class BucketScorer extends Scorer {

     double score;
     int doc = NO_MORE_DOCS;
     int freq;

     public BucketScorer(Weight weight) { super(weight); }

     @Override
     public int advance(int target) { return NO_MORE_DOCS; }

     @Override
     public int docID() { return doc; }

     @Override
     public int freq() { return freq; }

     @Override
     public int nextDoc() { return NO_MORE_DOCS; }

     @Override
     public float score() { return (float)score; }

     @Override
     public long cost() { return 1; }

   }

   static final class Bucket {
     int doc = -1;            // tells if bucket is valid
     double score;             // incremental score
     // TODO: break out bool anyProhibited, int
     // numRequiredMatched; then we can remove 32 limit on
     // required clauses
     int bits;                // used for bool constraints
     int coord;               // count of terms in score
     Bucket next;             // next valid bucket
   }

   /** A simple hash table of document scores within a range. */
   static final class BucketTable {
     public static final int SIZE = 1 << 11;
     public static final int MASK = SIZE - 1;

     final Bucket[] buckets = new Bucket[SIZE];
     Bucket first = null;                          // head of valid list

     public BucketTable() {
       // Pre-fill to save the lazy init when collecting
       // each sub:
       for(int idx=0;idx<SIZE;idx++) {
         buckets[idx] = new Bucket();
       }
     }

     public Collector newCollector(int mask) {
       return new BooleanScorerCollector(mask, this);
     }

     public int size() { return SIZE; }
   }

   static final class SubScorer {
     public Scorer scorer;
     // TODO: re-enable this if BQ ever sends us required clauses
     //public boolean required = false;
     public boolean prohibited;
     public Collector collector;
     public SubScorer next;

     public SubScorer(Scorer scorer, boolean required, boolean prohibited,
         Collector collector, SubScorer next) {
       if (required) {
         throw new IllegalArgumentException("this scorer cannot handle required=true");
       }
       this.scorer = scorer;
       // TODO: re-enable this if BQ ever sends us required clauses
       //this.required = required;
       this.prohibited = prohibited;
       this.collector = collector;
       this.next = next;
     }
   }

   private SubScorer scorers = null;
   private BucketTable bucketTable = new BucketTable();
   private final float[] coordFactors;
   // TODO: re-enable this if BQ ever sends us required clauses
   //private int requiredMask = 0;
   private final int minNrShouldMatch;
   private int end;
   private Bucket current;
   // Any time a prohibited clause matches we set bit 0:
   private static final int PROHIBITED_MASK = 1;

   BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
     super(weight);
     this.minNrShouldMatch = minNrShouldMatch;

     if (optionalScorers != null && optionalScorers.size() > 0) {
       for (Scorer scorer : optionalScorers) {
         if (scorer.nextDoc() != NO_MORE_DOCS) {
           scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
         }
       }
     }

     if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
       for (Scorer scorer : prohibitedScorers) {
         if (scorer.nextDoc() != NO_MORE_DOCS) {
           scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
         }
       }
     }

     coordFactors = new float[optionalScorers.size() + 1];
     for (int i = 0; i < coordFactors.length; i++) {
       coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
     }
   }

   // firstDocID is ignored since nextDoc() initializes 'current'
   @Override
   public boolean score(Collector collector, int max, int firstDocID) throws IOException {
     // Make sure it's only BooleanScorer that calls us:
     assert firstDocID == -1;
     boolean more;
     Bucket tmp;
     BucketScorer bs = new BucketScorer(weight);

     // The internal loop will set the score and doc before calling collect.
     collector.setScorer(bs);
     do {
       bucketTable.first = null;

       while (current != null) {         // more queued

         // check prohibited & required
         if ((current.bits & PROHIBITED_MASK) == 0) {

           // TODO: re-enable this if BQ ever sends us required
           // clauses
           //&& (current.bits & requiredMask) == requiredMask) {

           // NOTE: Lucene always passes max =
           // Integer.MAX_VALUE today, because we never embed
           // a BooleanScorer inside another (even though
           // that should work)... but in theory an outside
           // app could pass a different max so we must check
           // it:
           if (current.doc >= max){
             tmp = current;
             current = current.next;
             tmp.next = bucketTable.first;
             bucketTable.first = tmp;
             continue;
           }

           if (current.coord >= minNrShouldMatch) {
             bs.score = current.score * coordFactors[current.coord];
             bs.doc = current.doc;
             bs.freq = current.coord;
             collector.collect(current.doc);
           }
         }

         current = current.next;         // pop the queue
       }

       if (bucketTable.first != null){
         current = bucketTable.first;
         bucketTable.first = current.next;
         return true;
       }

       // refill the queue
       more = false;
       end += BucketTable.SIZE;
       for (SubScorer sub = scorers; sub != null; sub = sub.next) {
         int subScorerDocID = sub.scorer.docID();
         if (subScorerDocID != NO_MORE_DOCS) {
           more |= sub.scorer.score(sub.collector, end, subScorerDocID);
         }
       }
       current = bucketTable.first;

     } while (current != null || more);

     return false;
   }

   @Override
   public int advance(int target) {
     throw new UnsupportedOperationException();
   }

   @Override
   public int docID() {
     throw new UnsupportedOperationException();
   }

   @Override
   public int nextDoc() {
     throw new UnsupportedOperationException();
   }

   @Override
   public float score() {
     throw new UnsupportedOperationException();
   }

   @Override
   public int freq() throws IOException {
     throw new UnsupportedOperationException();
   }

   @Override
   public long cost() {
     return Integer.MAX_VALUE;
   }

   @Override
   public void score(Collector collector) throws IOException {
     score(collector, Integer.MAX_VALUE, -1);
   }

   @Override
   public String toString() {
     StringBuilder buffer = new StringBuilder();
     buffer.append("boolean(");
     for (SubScorer sub = scorers; sub != null; sub = sub.next) {
       buffer.append(sub.scorer.toString());
       buffer.append(" ");
     }
     buffer.append(")");
     return buffer.toString();
   }

   @Override
   public Collection<ChildScorer> getChildren() {
     throw new UnsupportedOperationException();
   }
 }
	package org.apache.lucene.search;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.List;

	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.search.BooleanQuery.BooleanWeight;

	/* Description from Doug Cutting (excerpted from
	* LUCENE-1483):
	*
	* BooleanScorer uses an array to score windows of
	* 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
	* etc. For each window it iterates through all query terms
	* and accumulates a score in table[doc%2K]. It also stores
	* in the table a bitmask representing which terms
	* contributed to the score. Non-zero scores are chained in
	* a linked list. At the end of scoring each window it then
	* iterates through the linked list and, if the bitmask
	* matches the boolean constraints, collects a hit. For
	* boolean queries with lots of frequent terms this can be
	* much faster, since it does not need to update a priority
	* queue for each posting, instead performing constant-time
	* operations per posting. The only downside is that it
	* results in hits being delivered out-of-order within the
	* window, which means it cannot be nested within other
	* scorers. But it works well as a top-level scorer.
	*
	* The new BooleanScorer2 implementation instead works by
	* merging priority queues of postings, albeit with some
	* clever tricks. For example, a pure conjunction (all terms
	* required) does not require a priority queue. Instead it
	* sorts the posting streams at the start, then repeatedly
	* skips the first to to the last. If the first ever equals
	* the last, then there's a hit. When some terms are
	* required and some terms are optional, the conjunction can
	* be evaluated first, then the optional terms can all skip
	* to the match and be added to the score. Thus the
	* conjunction can reduce the number of priority queue
	* updates for the optional terms. */

	final class BooleanScorer extends Scorer {

	private static final class BooleanScorerCollector extends Collector {
	private BucketTable bucketTable;
	private int mask;
	private Scorer scorer;

	public BooleanScorerCollector(int mask, BucketTable bucketTable) {
	this.mask = mask;
	this.bucketTable = bucketTable;
	}

	@Override
	public void collect(final int doc) throws IOException {
	final BucketTable table = bucketTable;
	final int i = doc & BucketTable.MASK;
	final Bucket bucket = table.buckets[i];

	if (bucket.doc != doc) { // invalid bucket
	bucket.doc = doc; // set doc
	bucket.score = scorer.score(); // initialize score
	bucket.bits = mask; // initialize mask
	bucket.coord = 1; // initialize coord

	bucket.next = table.first; // push onto valid list
	table.first = bucket;
	} else { // valid bucket
	bucket.score += scorer.score(); // increment score
	bucket.bits \|= mask; // add bits in mask
	bucket.coord++; // increment coord
	}
	}

	@Override
	public void setNextReader(AtomicReaderContext context) {
	// not needed by this implementation
	}

	@Override
	public void setScorer(Scorer scorer) {
	this.scorer = scorer;
	}

	@Override
	public boolean acceptsDocsOutOfOrder() {
	return true;
	}

	}

	// An internal class which is used in score(Collector, int) for setting the
	// current score. This is required since Collector exposes a setScorer method
	// and implementations that need the score will call scorer.score().
	// Therefore the only methods that are implemented are score() and doc().
	private static final class BucketScorer extends Scorer {

	double score;
	int doc = NO_MORE_DOCS;
	int freq;

	public BucketScorer(Weight weight) { super(weight); }

	@Override
	public int advance(int target) { return NO_MORE_DOCS; }

	@Override
	public int docID() { return doc; }

	@Override
	public int freq() { return freq; }

	@Override
	public int nextDoc() { return NO_MORE_DOCS; }

	@Override
	public float score() { return (float)score; }

	@Override
	public long cost() { return 1; }

	}

	static final class Bucket {
	int doc = -1; // tells if bucket is valid
	double score; // incremental score
	// TODO: break out bool anyProhibited, int
	// numRequiredMatched; then we can remove 32 limit on
	// required clauses
	int bits; // used for bool constraints
	int coord; // count of terms in score
	Bucket next; // next valid bucket
	}

	/** A simple hash table of document scores within a range. */
	static final class BucketTable {
	public static final int SIZE = 1 << 11;
	public static final int MASK = SIZE - 1;

	final Bucket[] buckets = new Bucket[SIZE];
	Bucket first = null; // head of valid list

	public BucketTable() {
	// Pre-fill to save the lazy init when collecting
	// each sub:
	for(int idx=0;idx<SIZE;idx++) {
	buckets[idx] = new Bucket();
	}
	}

	public Collector newCollector(int mask) {
	return new BooleanScorerCollector(mask, this);
	}

	public int size() { return SIZE; }
	}

	static final class SubScorer {
	public Scorer scorer;
	// TODO: re-enable this if BQ ever sends us required clauses
	//public boolean required = false;
	public boolean prohibited;
	public Collector collector;
	public SubScorer next;

	public SubScorer(Scorer scorer, boolean required, boolean prohibited,
	Collector collector, SubScorer next) {
	if (required) {
	throw new IllegalArgumentException("this scorer cannot handle required=true");
	}
	this.scorer = scorer;
	// TODO: re-enable this if BQ ever sends us required clauses
	//this.required = required;
	this.prohibited = prohibited;
	this.collector = collector;
	this.next = next;
	}
	}

	private SubScorer scorers = null;
	private BucketTable bucketTable = new BucketTable();
	private final float[] coordFactors;
	// TODO: re-enable this if BQ ever sends us required clauses
	//private int requiredMask = 0;
	private final int minNrShouldMatch;
	private int end;
	private Bucket current;
	// Any time a prohibited clause matches we set bit 0:
	private static final int PROHIBITED_MASK = 1;

	BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch,
	List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
	super(weight);
	this.minNrShouldMatch = minNrShouldMatch;

	if (optionalScorers != null && optionalScorers.size() > 0) {
	for (Scorer scorer : optionalScorers) {
	if (scorer.nextDoc() != NO_MORE_DOCS) {
	scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
	}
	}
	}

	if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
	for (Scorer scorer : prohibitedScorers) {
	if (scorer.nextDoc() != NO_MORE_DOCS) {
	scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
	}
	}
	}

	coordFactors = new float[optionalScorers.size() + 1];
	for (int i = 0; i < coordFactors.length; i++) {
	coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
	}
	}

	// firstDocID is ignored since nextDoc() initializes 'current'
	@Override
	public boolean score(Collector collector, int max, int firstDocID) throws IOException {
	// Make sure it's only BooleanScorer that calls us:
	assert firstDocID == -1;
	boolean more;
	Bucket tmp;
	BucketScorer bs = new BucketScorer(weight);

	// The internal loop will set the score and doc before calling collect.
	collector.setScorer(bs);
	do {
	bucketTable.first = null;

	while (current != null) { // more queued

	// check prohibited & required
	if ((current.bits & PROHIBITED_MASK) == 0) {

	// TODO: re-enable this if BQ ever sends us required
	// clauses
	//&& (current.bits & requiredMask) == requiredMask) {

	// NOTE: Lucene always passes max =
	// Integer.MAX_VALUE today, because we never embed
	// a BooleanScorer inside another (even though
	// that should work)... but in theory an outside
	// app could pass a different max so we must check
	// it:
	if (current.doc >= max){
	tmp = current;
	current = current.next;
	tmp.next = bucketTable.first;
	bucketTable.first = tmp;
	continue;
	}

	if (current.coord >= minNrShouldMatch) {
	bs.score = current.score * coordFactors[current.coord];
	bs.doc = current.doc;
	bs.freq = current.coord;
	collector.collect(current.doc);
	}
	}

	current = current.next; // pop the queue
	}

	if (bucketTable.first != null){
	current = bucketTable.first;
	bucketTable.first = current.next;
	return true;
	}

	// refill the queue
	more = false;
	end += BucketTable.SIZE;
	for (SubScorer sub = scorers; sub != null; sub = sub.next) {
	int subScorerDocID = sub.scorer.docID();
	if (subScorerDocID != NO_MORE_DOCS) {
	more \|= sub.scorer.score(sub.collector, end, subScorerDocID);
	}
	}
	current = bucketTable.first;

	} while (current != null \|\| more);

	return false;
	}

	@Override
	public int advance(int target) {
	throw new UnsupportedOperationException();
	}

	@Override
	public int docID() {
	throw new UnsupportedOperationException();
	}

	@Override
	public int nextDoc() {
	throw new UnsupportedOperationException();
	}

	@Override
	public float score() {
	throw new UnsupportedOperationException();
	}

	@Override
	public int freq() throws IOException {
	throw new UnsupportedOperationException();
	}

	@Override
	public long cost() {
	return Integer.MAX_VALUE;
	}

	@Override
	public void score(Collector collector) throws IOException {
	score(collector, Integer.MAX_VALUE, -1);
	}

	@Override
	public String toString() {
	StringBuilder buffer = new StringBuilder();
	buffer.append("boolean(");
	for (SubScorer sub = scorers; sub != null; sub = sub.next) {
	buffer.append(sub.scorer.toString());
	buffer.append(" ");
	}
	buffer.append(")");
	return buffer.toString();
	}

	@Override
	public Collection<ChildScorer> getChildren() {
	throw new UnsupportedOperationException();
	}
	}