lucene/backwards/src/java/org/apache/lucene/search/BooleanScorer.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.List;

 import org.apache.lucene.index.IndexReader;

 /* Description from Doug Cutting (excerpted from
  * LUCENE-1483):
  *
  * BooleanScorer uses a ~16k array to score windows of
  * docs. So it scores docs 0-16k first, then docs 16-32k,
  * etc. For each window it iterates through all query terms
  * and accumulates a score in table[doc%16k]. It also stores
  * in the table a bitmask representing which terms
  * contributed to the score. Non-zero scores are chained in
  * a linked list. At the end of scoring each window it then
  * iterates through the linked list and, if the bitmask
  * matches the boolean constraints, collects a hit. For
  * boolean queries with lots of frequent terms this can be
  * much faster, since it does not need to update a priority
  * queue for each posting, instead performing constant-time
  * operations per posting. The only downside is that it
  * results in hits being delivered out-of-order within the
  * window, which means it cannot be nested within other
  * scorers. But it works well as a top-level scorer.
  *
  * The new BooleanScorer2 implementation instead works by
  * merging priority queues of postings, albeit with some
  * clever tricks. For example, a pure conjunction (all terms
  * required) does not require a priority queue. Instead it
  * sorts the posting streams at the start, then repeatedly
  * skips the first to to the last. If the first ever equals
  * the last, then there's a hit. When some terms are
  * required and some terms are optional, the conjunction can
  * be evaluated first, then the optional terms can all skip
  * to the match and be added to the score. Thus the
  * conjunction can reduce the number of priority queue
  * updates for the optional terms. */

 final class BooleanScorer extends Scorer {

   private static final class BooleanScorerCollector extends Collector {
     private BucketTable bucketTable;
     private int mask;
     private Scorer scorer;

     public BooleanScorerCollector(int mask, BucketTable bucketTable) {
       this.mask = mask;
       this.bucketTable = bucketTable;
     }

     @Override
     public final void collect(final int doc) throws IOException {
       final BucketTable table = bucketTable;
       final int i = doc & BucketTable.MASK;
       Bucket bucket = table.buckets[i];
       if (bucket == null)
         table.buckets[i] = bucket = new Bucket();

       if (bucket.doc != doc) {                    // invalid bucket
         bucket.doc = doc;                         // set doc
         bucket.score = scorer.score();            // initialize score
         bucket.bits = mask;                       // initialize mask
         bucket.coord = 1;                         // initialize coord

         bucket.next = table.first;                // push onto valid list
         table.first = bucket;
       } else {                                    // valid bucket
         bucket.score += scorer.score();           // increment score
         bucket.bits |= mask;                      // add bits in mask
         bucket.coord++;                           // increment coord
       }
     }

     @Override
     public void setNextReader(IndexReader reader, int docBase) {
       // not needed by this implementation
     }

     @Override
     public void setScorer(Scorer scorer) throws IOException {
       this.scorer = scorer;
     }

     @Override
     public boolean acceptsDocsOutOfOrder() {
       return true;
     }

   }

   // An internal class which is used in score(Collector, int) for setting the
   // current score. This is required since Collector exposes a setScorer method
   // and implementations that need the score will call scorer.score().
   // Therefore the only methods that are implemented are score() and doc().
   private static final class BucketScorer extends Scorer {

     float score;
     int doc = NO_MORE_DOCS;

     public BucketScorer() { super(null); }

     @Override
     public int advance(int target) throws IOException { return NO_MORE_DOCS; }

     @Override
     public int docID() { return doc; }

     @Override
     public int nextDoc() throws IOException { return NO_MORE_DOCS; }

     @Override
     public float score() throws IOException { return score; }

   }

   static final class Bucket {
     int doc = -1;            // tells if bucket is valid
     float score;             // incremental score
     int bits;                // used for bool constraints
     int coord;               // count of terms in score
     Bucket next;             // next valid bucket
   }

   /** A simple hash table of document scores within a range. */
   static final class BucketTable {
     public static final int SIZE = 1 << 11;
     public static final int MASK = SIZE - 1;

     final Bucket[] buckets = new Bucket[SIZE];
     Bucket first = null;                          // head of valid list

     public BucketTable() {}

     public Collector newCollector(int mask) {
       return new BooleanScorerCollector(mask, this);
     }

     public final int size() { return SIZE; }
   }

   static final class SubScorer {
     public Scorer scorer;
     public boolean required = false;
     public boolean prohibited = false;
     public Collector collector;
     public SubScorer next;

     public SubScorer(Scorer scorer, boolean required, boolean prohibited,
         Collector collector, SubScorer next)
       throws IOException {
       this.scorer = scorer;
       this.required = required;
       this.prohibited = prohibited;
       this.collector = collector;
       this.next = next;
     }
   }

   private SubScorer scorers = null;
   private BucketTable bucketTable = new BucketTable();
   private int maxCoord = 1;
   private final float[] coordFactors;
   private int requiredMask = 0;
   private int prohibitedMask = 0;
   private int nextMask = 1;
   private final int minNrShouldMatch;
   private int end;
   private Bucket current;
   private int doc = -1;

   BooleanScorer(Similarity similarity, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers) throws IOException {
     super(similarity);
     this.minNrShouldMatch = minNrShouldMatch;

     if (optionalScorers != null && optionalScorers.size() > 0) {
       for (Scorer scorer : optionalScorers) {
         maxCoord++;
         if (scorer.nextDoc() != NO_MORE_DOCS) {
           scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
         }
       }
     }

     if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
       for (Scorer scorer : prohibitedScorers) {
         int mask = nextMask;
         nextMask = nextMask << 1;
         prohibitedMask |= mask;                     // update prohibited mask
         if (scorer.nextDoc() != NO_MORE_DOCS) {
           scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
         }
       }
     }

     coordFactors = new float[maxCoord];
     Similarity sim = getSimilarity();
     for (int i = 0; i < maxCoord; i++) {
       coordFactors[i] = sim.coord(i, maxCoord - 1);
     }
   }

   // firstDocID is ignored since nextDoc() initializes 'current'
   @Override
   protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
     boolean more;
     Bucket tmp;
     BucketScorer bs = new BucketScorer();
     // The internal loop will set the score and doc before calling collect.
     collector.setScorer(bs);
     do {
       bucketTable.first = null;

       while (current != null) {         // more queued

         // check prohibited & required
         if ((current.bits & prohibitedMask) == 0 &&
             (current.bits & requiredMask) == requiredMask) {

           if (current.doc >= max){
             tmp = current;
             current = current.next;
             tmp.next = bucketTable.first;
             bucketTable.first = tmp;
             continue;
           }

           if (current.coord >= minNrShouldMatch) {
             bs.score = current.score * coordFactors[current.coord];
             bs.doc = current.doc;
             collector.collect(current.doc);
           }
         }

         current = current.next;         // pop the queue
       }

       if (bucketTable.first != null){
         current = bucketTable.first;
         bucketTable.first = current.next;
         return true;
       }

       // refill the queue
       more = false;
       end += BucketTable.SIZE;
       for (SubScorer sub = scorers; sub != null; sub = sub.next) {
         int subScorerDocID = sub.scorer.docID();
         if (subScorerDocID != NO_MORE_DOCS) {
           more |= sub.scorer.score(sub.collector, end, subScorerDocID);
         }
       }
       current = bucketTable.first;

     } while (current != null || more);

     return false;
   }

   @Override
   public int advance(int target) throws IOException {
     throw new UnsupportedOperationException();
   }

   @Override
   public int docID() {
     return doc;
   }

   @Override
   public int nextDoc() throws IOException {
     boolean more;
     do {
       while (bucketTable.first != null) {         // more queued
         current = bucketTable.first;
         bucketTable.first = current.next;         // pop the queue

         // check prohibited & required, and minNrShouldMatch
         if ((current.bits & prohibitedMask) == 0 &&
             (current.bits & requiredMask) == requiredMask &&
             current.coord >= minNrShouldMatch) {
           return doc = current.doc;
         }
       }

       // refill the queue
       more = false;
       end += BucketTable.SIZE;
       for (SubScorer sub = scorers; sub != null; sub = sub.next) {
         Scorer scorer = sub.scorer;
         sub.collector.setScorer(scorer);
         int doc = scorer.docID();
         while (doc < end) {
           sub.collector.collect(doc);
           doc = scorer.nextDoc();
         }
         more |= (doc != NO_MORE_DOCS);
       }
     } while (bucketTable.first != null || more);

     return doc = NO_MORE_DOCS;
   }

   @Override
   public float score() {
     return current.score * coordFactors[current.coord];
   }

   @Override
   public void score(Collector collector) throws IOException {
     score(collector, Integer.MAX_VALUE, nextDoc());
   }

   @Override
   public String toString() {
     StringBuilder buffer = new StringBuilder();
     buffer.append("boolean(");
     for (SubScorer sub = scorers; sub != null; sub = sub.next) {
       buffer.append(sub.scorer.toString());
       buffer.append(" ");
     }
     buffer.append(")");
     return buffer.toString();
   }

 }
	package org.apache.lucene.search;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.List;

	import org.apache.lucene.index.IndexReader;

	/* Description from Doug Cutting (excerpted from
	* LUCENE-1483):
	*
	* BooleanScorer uses a ~16k array to score windows of
	* docs. So it scores docs 0-16k first, then docs 16-32k,
	* etc. For each window it iterates through all query terms
	* and accumulates a score in table[doc%16k]. It also stores
	* in the table a bitmask representing which terms
	* contributed to the score. Non-zero scores are chained in
	* a linked list. At the end of scoring each window it then
	* iterates through the linked list and, if the bitmask
	* matches the boolean constraints, collects a hit. For
	* boolean queries with lots of frequent terms this can be
	* much faster, since it does not need to update a priority
	* queue for each posting, instead performing constant-time
	* operations per posting. The only downside is that it
	* results in hits being delivered out-of-order within the
	* window, which means it cannot be nested within other
	* scorers. But it works well as a top-level scorer.
	*
	* The new BooleanScorer2 implementation instead works by
	* merging priority queues of postings, albeit with some
	* clever tricks. For example, a pure conjunction (all terms
	* required) does not require a priority queue. Instead it
	* sorts the posting streams at the start, then repeatedly
	* skips the first to to the last. If the first ever equals
	* the last, then there's a hit. When some terms are
	* required and some terms are optional, the conjunction can
	* be evaluated first, then the optional terms can all skip
	* to the match and be added to the score. Thus the
	* conjunction can reduce the number of priority queue
	* updates for the optional terms. */

	final class BooleanScorer extends Scorer {

	private static final class BooleanScorerCollector extends Collector {
	private BucketTable bucketTable;
	private int mask;
	private Scorer scorer;

	public BooleanScorerCollector(int mask, BucketTable bucketTable) {
	this.mask = mask;
	this.bucketTable = bucketTable;
	}

	@Override
	public final void collect(final int doc) throws IOException {
	final BucketTable table = bucketTable;
	final int i = doc & BucketTable.MASK;
	Bucket bucket = table.buckets[i];
	if (bucket == null)
	table.buckets[i] = bucket = new Bucket();

	if (bucket.doc != doc) { // invalid bucket
	bucket.doc = doc; // set doc
	bucket.score = scorer.score(); // initialize score
	bucket.bits = mask; // initialize mask
	bucket.coord = 1; // initialize coord

	bucket.next = table.first; // push onto valid list
	table.first = bucket;
	} else { // valid bucket
	bucket.score += scorer.score(); // increment score
	bucket.bits \|= mask; // add bits in mask
	bucket.coord++; // increment coord
	}
	}

	@Override
	public void setNextReader(IndexReader reader, int docBase) {
	// not needed by this implementation
	}

	@Override
	public void setScorer(Scorer scorer) throws IOException {
	this.scorer = scorer;
	}

	@Override
	public boolean acceptsDocsOutOfOrder() {
	return true;
	}

	}

	// An internal class which is used in score(Collector, int) for setting the
	// current score. This is required since Collector exposes a setScorer method
	// and implementations that need the score will call scorer.score().
	// Therefore the only methods that are implemented are score() and doc().
	private static final class BucketScorer extends Scorer {

	float score;
	int doc = NO_MORE_DOCS;

	public BucketScorer() { super(null); }

	@Override
	public int advance(int target) throws IOException { return NO_MORE_DOCS; }

	@Override
	public int docID() { return doc; }

	@Override
	public int nextDoc() throws IOException { return NO_MORE_DOCS; }

	@Override
	public float score() throws IOException { return score; }

	}

	static final class Bucket {
	int doc = -1; // tells if bucket is valid
	float score; // incremental score
	int bits; // used for bool constraints
	int coord; // count of terms in score
	Bucket next; // next valid bucket
	}

	/** A simple hash table of document scores within a range. */
	static final class BucketTable {
	public static final int SIZE = 1 << 11;
	public static final int MASK = SIZE - 1;

	final Bucket[] buckets = new Bucket[SIZE];
	Bucket first = null; // head of valid list

	public BucketTable() {}

	public Collector newCollector(int mask) {
	return new BooleanScorerCollector(mask, this);
	}

	public final int size() { return SIZE; }
	}

	static final class SubScorer {
	public Scorer scorer;
	public boolean required = false;
	public boolean prohibited = false;
	public Collector collector;
	public SubScorer next;

	public SubScorer(Scorer scorer, boolean required, boolean prohibited,
	Collector collector, SubScorer next)
	throws IOException {
	this.scorer = scorer;
	this.required = required;
	this.prohibited = prohibited;
	this.collector = collector;
	this.next = next;
	}
	}

	private SubScorer scorers = null;
	private BucketTable bucketTable = new BucketTable();
	private int maxCoord = 1;
	private final float[] coordFactors;
	private int requiredMask = 0;
	private int prohibitedMask = 0;
	private int nextMask = 1;
	private final int minNrShouldMatch;
	private int end;
	private Bucket current;
	private int doc = -1;

	BooleanScorer(Similarity similarity, int minNrShouldMatch,
	List<Scorer> optionalScorers, List<Scorer> prohibitedScorers) throws IOException {
	super(similarity);
	this.minNrShouldMatch = minNrShouldMatch;

	if (optionalScorers != null && optionalScorers.size() > 0) {
	for (Scorer scorer : optionalScorers) {
	maxCoord++;
	if (scorer.nextDoc() != NO_MORE_DOCS) {
	scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers);
	}
	}
	}

	if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
	for (Scorer scorer : prohibitedScorers) {
	int mask = nextMask;
	nextMask = nextMask << 1;
	prohibitedMask \|= mask; // update prohibited mask
	if (scorer.nextDoc() != NO_MORE_DOCS) {
	scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
	}
	}
	}

	coordFactors = new float[maxCoord];
	Similarity sim = getSimilarity();
	for (int i = 0; i < maxCoord; i++) {
	coordFactors[i] = sim.coord(i, maxCoord - 1);
	}
	}

	// firstDocID is ignored since nextDoc() initializes 'current'
	@Override
	protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
	boolean more;
	Bucket tmp;
	BucketScorer bs = new BucketScorer();
	// The internal loop will set the score and doc before calling collect.
	collector.setScorer(bs);
	do {
	bucketTable.first = null;

	while (current != null) { // more queued

	// check prohibited & required
	if ((current.bits & prohibitedMask) == 0 &&
	(current.bits & requiredMask) == requiredMask) {

	if (current.doc >= max){
	tmp = current;
	current = current.next;
	tmp.next = bucketTable.first;
	bucketTable.first = tmp;
	continue;
	}

	if (current.coord >= minNrShouldMatch) {
	bs.score = current.score * coordFactors[current.coord];
	bs.doc = current.doc;
	collector.collect(current.doc);
	}
	}

	current = current.next; // pop the queue
	}

	if (bucketTable.first != null){
	current = bucketTable.first;
	bucketTable.first = current.next;
	return true;
	}

	// refill the queue
	more = false;
	end += BucketTable.SIZE;
	for (SubScorer sub = scorers; sub != null; sub = sub.next) {
	int subScorerDocID = sub.scorer.docID();
	if (subScorerDocID != NO_MORE_DOCS) {
	more \|= sub.scorer.score(sub.collector, end, subScorerDocID);
	}
	}
	current = bucketTable.first;

	} while (current != null \|\| more);

	return false;
	}

	@Override
	public int advance(int target) throws IOException {
	throw new UnsupportedOperationException();
	}

	@Override
	public int docID() {
	return doc;
	}

	@Override
	public int nextDoc() throws IOException {
	boolean more;
	do {
	while (bucketTable.first != null) { // more queued
	current = bucketTable.first;
	bucketTable.first = current.next; // pop the queue

	// check prohibited & required, and minNrShouldMatch
	if ((current.bits & prohibitedMask) == 0 &&
	(current.bits & requiredMask) == requiredMask &&
	current.coord >= minNrShouldMatch) {
	return doc = current.doc;
	}
	}

	// refill the queue
	more = false;
	end += BucketTable.SIZE;
	for (SubScorer sub = scorers; sub != null; sub = sub.next) {
	Scorer scorer = sub.scorer;
	sub.collector.setScorer(scorer);
	int doc = scorer.docID();
	while (doc < end) {
	sub.collector.collect(doc);
	doc = scorer.nextDoc();
	}
	more \|= (doc != NO_MORE_DOCS);
	}
	} while (bucketTable.first != null \|\| more);

	return doc = NO_MORE_DOCS;
	}

	@Override
	public float score() {
	return current.score * coordFactors[current.coord];
	}

	@Override
	public void score(Collector collector) throws IOException {
	score(collector, Integer.MAX_VALUE, nextDoc());
	}

	@Override
	public String toString() {
	StringBuilder buffer = new StringBuilder();
	buffer.append("boolean(");
	for (SubScorer sub = scorers; sub != null; sub = sub.next) {
	buffer.append(sub.scorer.toString());
	buffer.append(" ");
	}
	buffer.append(")");
	return buffer.toString();
	}

	}