lucene/backwards/src/java/org/apache/lucene/search/PhraseScorer.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;

 import org.apache.lucene.index.TermPositions;

 /** Expert: Scoring functionality for phrase queries.
  * <br>A document is considered matching if it contains the phrase-query terms
  * at "valid" positions. What "valid positions" are
  * depends on the type of the phrase query: for an exact phrase query terms are required
  * to appear in adjacent locations, while for a sloppy phrase query some distance between
  * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
  * is invoked for each document containing all the phrase query terms, in order to
  * compute the frequency of the phrase query in that document. A non zero frequency
  * means a match.
  */
 abstract class PhraseScorer extends Scorer {
   private Weight weight;
   protected byte[] norms;
   protected float value;

   private boolean firstTime = true;
   private boolean more = true;
   protected PhraseQueue pq;
   protected PhrasePositions first, last;

   private float freq; //phrase frequency in current doc as computed by phraseFreq().

   PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets,
       Similarity similarity, byte[] norms) {
     super(similarity);
     this.norms = norms;
     this.weight = weight;
     this.value = weight.getValue();

     // convert tps to a list of phrase positions.
     // note: phrase-position differs from term-position in that its position
     // reflects the phrase offset: pp.pos = tp.pos - offset.
     // this allows to easily identify a matching (exact) phrase
     // when all PhrasePositions have exactly the same position.
     for (int i = 0; i < tps.length; i++) {
       PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
       if (last != null) {			  // add next to end of list
         last.next = pp;
       } else {
         first = pp;
       }
       last = pp;
     }

     pq = new PhraseQueue(tps.length);             // construct empty pq
     first.doc = -1;
   }

   @Override
   public int docID() { return first.doc; }

   @Override
   public int nextDoc() throws IOException {
     if (firstTime) {
       init();
       firstTime = false;
     } else if (more) {
       more = last.next();                         // trigger further scanning
     }
     if (!doNext()) {
       first.doc = NO_MORE_DOCS;
     }
     return first.doc;
   }

   // next without initial increment
   private boolean doNext() throws IOException {
     while (more) {
       while (more && first.doc < last.doc) {      // find doc w/ all the terms
         more = first.skipTo(last.doc);            // skip first upto last
         firstToLast();                            // and move it to the end
       }

       if (more) {
         // found a doc with all of the terms
         freq = phraseFreq();                      // check for phrase
         if (freq == 0.0f)                         // no match
           more = last.next();                     // trigger further scanning
         else
           return true;                            // found a match
       }
     }
     return false;                                 // no more matches
   }

   @Override
   public float score() throws IOException {
     //System.out.println("scoring " + first.doc);
     float raw = getSimilarity().tf(freq) * value; // raw score
     return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize
   }

   @Override
   public int advance(int target) throws IOException {
     firstTime = false;
     for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
       more = pp.skipTo(target);
     }
     if (more) {
       sort();                                     // re-sort
     }
     if (!doNext()) {
       first.doc = NO_MORE_DOCS;
     }
     return first.doc;
   }

   /**
    * phrase frequency in current doc as computed by phraseFreq().
    */
   public final float currentFreq() { return freq; }

   /**
    * For a document containing all the phrase query terms, compute the
    * frequency of the phrase in that document.
    * A non zero frequency means a match.
    * <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
    * @return frequency of the phrase in current doc, 0 if not found.
    */
   protected abstract float phraseFreq() throws IOException;

   private void init() throws IOException {
     for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
       more = pp.next();
     }
     if (more) {
       sort();
     }
   }

   private void sort() {
     pq.clear();
     for (PhrasePositions pp = first; pp != null; pp = pp.next) {
       pq.add(pp);
     }
     pqToList();
   }

   protected final void pqToList() {
     last = first = null;
     while (pq.top() != null) {
       PhrasePositions pp = pq.pop();
       if (last != null) {			  // add next to end of list
         last.next = pp;
       } else
         first = pp;
       last = pp;
       pp.next = null;
     }
   }

   protected final void firstToLast() {
     last.next = first;			  // move first to end of list
     last = first;
     first = first.next;
     last.next = null;
   }

   @Override
   public String toString() { return "scorer(" + weight + ")"; }

 }
	package org.apache.lucene.search;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;

	import org.apache.lucene.index.TermPositions;

	/** Expert: Scoring functionality for phrase queries.
	* <br>A document is considered matching if it contains the phrase-query terms
	* at "valid" positions. What "valid positions" are
	* depends on the type of the phrase query: for an exact phrase query terms are required
	* to appear in adjacent locations, while for a sloppy phrase query some distance between
	* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
	* is invoked for each document containing all the phrase query terms, in order to
	* compute the frequency of the phrase query in that document. A non zero frequency
	* means a match.
	*/
	abstract class PhraseScorer extends Scorer {
	private Weight weight;
	protected byte[] norms;
	protected float value;

	private boolean firstTime = true;
	private boolean more = true;
	protected PhraseQueue pq;
	protected PhrasePositions first, last;

	private float freq; //phrase frequency in current doc as computed by phraseFreq().

	PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets,
	Similarity similarity, byte[] norms) {
	super(similarity);
	this.norms = norms;
	this.weight = weight;
	this.value = weight.getValue();

	// convert tps to a list of phrase positions.
	// note: phrase-position differs from term-position in that its position
	// reflects the phrase offset: pp.pos = tp.pos - offset.
	// this allows to easily identify a matching (exact) phrase
	// when all PhrasePositions have exactly the same position.
	for (int i = 0; i < tps.length; i++) {
	PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
	if (last != null) { // add next to end of list
	last.next = pp;
	} else {
	first = pp;
	}
	last = pp;
	}

	pq = new PhraseQueue(tps.length); // construct empty pq
	first.doc = -1;
	}

	@Override
	public int docID() { return first.doc; }

	@Override
	public int nextDoc() throws IOException {
	if (firstTime) {
	init();
	firstTime = false;
	} else if (more) {
	more = last.next(); // trigger further scanning
	}
	if (!doNext()) {
	first.doc = NO_MORE_DOCS;
	}
	return first.doc;
	}

	// next without initial increment
	private boolean doNext() throws IOException {
	while (more) {
	while (more && first.doc < last.doc) { // find doc w/ all the terms
	more = first.skipTo(last.doc); // skip first upto last
	firstToLast(); // and move it to the end
	}

	if (more) {
	// found a doc with all of the terms
	freq = phraseFreq(); // check for phrase
	if (freq == 0.0f) // no match
	more = last.next(); // trigger further scanning
	else
	return true; // found a match
	}
	}
	return false; // no more matches
	}

	@Override
	public float score() throws IOException {
	//System.out.println("scoring " + first.doc);
	float raw = getSimilarity().tf(freq) * value; // raw score
	return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize
	}

	@Override
	public int advance(int target) throws IOException {
	firstTime = false;
	for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
	more = pp.skipTo(target);
	}
	if (more) {
	sort(); // re-sort
	}
	if (!doNext()) {
	first.doc = NO_MORE_DOCS;
	}
	return first.doc;
	}

	/**
	* phrase frequency in current doc as computed by phraseFreq().
	*/
	public final float currentFreq() { return freq; }

	/**
	* For a document containing all the phrase query terms, compute the
	* frequency of the phrase in that document.
	* A non zero frequency means a match.
	* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
	* @return frequency of the phrase in current doc, 0 if not found.
	*/
	protected abstract float phraseFreq() throws IOException;

	private void init() throws IOException {
	for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
	more = pp.next();
	}
	if (more) {
	sort();
	}
	}

	private void sort() {
	pq.clear();
	for (PhrasePositions pp = first; pp != null; pp = pp.next) {
	pq.add(pp);
	}
	pqToList();
	}

	protected final void pqToList() {
	last = first = null;
	while (pq.top() != null) {
	PhrasePositions pp = pq.pop();
	if (last != null) { // add next to end of list
	last.next = pp;
	} else
	first = pp;
	last = pp;
	pp.next = null;
	}
	}

	protected final void firstToLast() {
	last.next = first; // move first to end of list
	last = first;
	first = first.next;
	last.next = null;
	}

	@Override
	public String toString() { return "scorer(" + weight + ")"; }

	}