src/java/org/apache/lucene/search/PhrasePrefixQuery.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /**
  * Copyright 2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Vector;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultipleTermPositions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermPositions;
 import org.apache.lucene.search.Query;

 /**
  * PhrasePrefixQuery is a generalized version of PhraseQuery, with an added
  * method {@link #add(Term[])}.
  * To use this class, to search for the phrase "Microsoft app*" first use
  * add(Term) on the term "Microsoft", then find all terms that has "app" as
  * prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
  * terms) to add them to the query.
  *
  * @author Anders Nielsen
  * @version 1.0
  */
 public class PhrasePrefixQuery extends Query {
   private String field;
   private ArrayList termArrays = new ArrayList();
   private Vector positions = new Vector();

   private int slop = 0;

   /** Sets the phrase slop for this query.
    * @see PhraseQuery#setSlop(int)
    */
   public void setSlop(int s) { slop = s; }

   /** Sets the phrase slop for this query.
    * @see PhraseQuery#getSlop()
    */
   public int getSlop() { return slop; }

   /** Add a single term at the next position in the phrase.
    * @see PhraseQuery#add(Term)
    */
   public void add(Term term) { add(new Term[]{term}); }

   /** Add multiple terms at the next position in the phrase.  Any of the terms
    * may match.
    *
    * @see PhraseQuery#add(Term)
    */
   public void add(Term[] terms) {
     int position = 0;
     if (positions.size() > 0)
       position = ((Integer) positions.lastElement()).intValue() + 1;

     add(terms, position);
   }

   /**
    * Allows to specify the relative position of terms within the phrase.
    *
    * @see PhraseQuery#add(Term, int)
    * @param terms
    * @param position
    */
   public void add(Term[] terms, int position) {
     if (termArrays.size() == 0)
       field = terms[0].field();

     for (int i = 0; i < terms.length; i++) {
       if (terms[i].field() != field) {
         throw new IllegalArgumentException(
             "All phrase terms must be in the same field (" + field + "): "
                 + terms[i]);
       }
     }

     termArrays.add(terms);
     positions.addElement(new Integer(position));
   }

   /**
    * Returns the relative positions of terms in this phrase.
    */
   public int[] getPositions() {
     int[] result = new int[positions.size()];
     for (int i = 0; i < positions.size(); i++)
       result[i] = ((Integer) positions.elementAt(i)).intValue();
     return result;
   }

   private class PhrasePrefixWeight implements Weight {
     private Searcher searcher;
     private float value;
     private float idf;
     private float queryNorm;
     private float queryWeight;

     public PhrasePrefixWeight(Searcher searcher) {
       this.searcher = searcher;
     }

     public Query getQuery() { return PhrasePrefixQuery.this; }
     public float getValue() { return value; }

     public float sumOfSquaredWeights() throws IOException {
       Iterator i = termArrays.iterator();
       while (i.hasNext()) {
         Term[] terms = (Term[])i.next();
         for (int j=0; j<terms.length; j++)
           idf += getSimilarity(searcher).idf(terms[j], searcher);
       }

       queryWeight = idf * getBoost();             // compute query weight
       return queryWeight * queryWeight;           // square it
     }

     public void normalize(float queryNorm) {
       this.queryNorm = queryNorm;
       queryWeight *= queryNorm;                   // normalize query weight
       value = queryWeight * idf;                  // idf for document
     }

     public Scorer scorer(IndexReader reader) throws IOException {
       if (termArrays.size() == 0)                  // optimize zero-term case
         return null;

       TermPositions[] tps = new TermPositions[termArrays.size()];
       for (int i=0; i<tps.length; i++) {
         Term[] terms = (Term[])termArrays.get(i);

         TermPositions p;
         if (terms.length > 1)
           p = new MultipleTermPositions(reader, terms);
         else
           p = reader.termPositions(terms[0]);

         if (p == null)
           return null;

         tps[i] = p;
       }

       if (slop == 0)
         return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
                                      reader.norms(field));
       else
         return new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
                                       slop, reader.norms(field));
     }

     public Explanation explain(IndexReader reader, int doc)
       throws IOException {
       Explanation result = new Explanation();
       result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");

       Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");

       // explain query weight
       Explanation queryExpl = new Explanation();
       queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");

       Explanation boostExpl = new Explanation(getBoost(), "boost");
       if (getBoost() != 1.0f)
         queryExpl.addDetail(boostExpl);

       queryExpl.addDetail(idfExpl);

       Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
       queryExpl.addDetail(queryNormExpl);

       queryExpl.setValue(boostExpl.getValue() *
                          idfExpl.getValue() *
                          queryNormExpl.getValue());

       result.addDetail(queryExpl);

       // explain field weight
       Explanation fieldExpl = new Explanation();
       fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
                                "), product of:");

       Explanation tfExpl = scorer(reader).explain(doc);
       fieldExpl.addDetail(tfExpl);
       fieldExpl.addDetail(idfExpl);

       Explanation fieldNormExpl = new Explanation();
       byte[] fieldNorms = reader.norms(field);
       float fieldNorm =
         fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
       fieldNormExpl.setValue(fieldNorm);
       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
       fieldExpl.addDetail(fieldNormExpl);

       fieldExpl.setValue(tfExpl.getValue() *
                          idfExpl.getValue() *
                          fieldNormExpl.getValue());

       result.addDetail(fieldExpl);

       // combine them
       result.setValue(queryExpl.getValue() * fieldExpl.getValue());

       if (queryExpl.getValue() == 1.0f)
         return fieldExpl;

       return result;
     }
   }

   protected Weight createWeight(Searcher searcher) {
     if (termArrays.size() == 1) {                 // optimize one-term case
       Term[] terms = (Term[])termArrays.get(0);
       BooleanQuery boq = new BooleanQuery();
       for (int i=0; i<terms.length; i++) {
         boq.add(new TermQuery(terms[i]), false, false);
       }
       boq.setBoost(getBoost());
       return boq.createWeight(searcher);
     }
     return new PhrasePrefixWeight(searcher);
   }

   /** Prints a user-readable version of this query. */
   public final String toString(String f) {
     StringBuffer buffer = new StringBuffer();
     if (!field.equals(f)) {
       buffer.append(field);
       buffer.append(":");
     }

     buffer.append("\"");
     Iterator i = termArrays.iterator();
     while (i.hasNext()) {
       Term[] terms = (Term[])i.next();
       buffer.append(terms[0].text() + (terms.length > 1 ? "*" : ""));
       if (i.hasNext())
         buffer.append(" ");
     }
     buffer.append("\"");

     if (slop != 0) {
       buffer.append("~");
       buffer.append(slop);
     }

     if (getBoost() != 1.0f) {
       buffer.append("^");
       buffer.append(Float.toString(getBoost()));
     }

     return buffer.toString();
   }
 }
	package org.apache.lucene.search;

	/**
	* Copyright 2004 The Apache Software Foundation
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.Vector;

	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.MultipleTermPositions;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermPositions;
	import org.apache.lucene.search.Query;

	/**
	* PhrasePrefixQuery is a generalized version of PhraseQuery, with an added
	* method {@link #add(Term[])}.
	* To use this class, to search for the phrase "Microsoft app*" first use
	* add(Term) on the term "Microsoft", then find all terms that has "app" as
	* prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
	* terms) to add them to the query.
	*
	* @author Anders Nielsen
	* @version 1.0
	*/
	public class PhrasePrefixQuery extends Query {
	private String field;
	private ArrayList termArrays = new ArrayList();
	private Vector positions = new Vector();

	private int slop = 0;

	/** Sets the phrase slop for this query.
	* @see PhraseQuery#setSlop(int)
	*/
	public void setSlop(int s) { slop = s; }

	/** Sets the phrase slop for this query.
	* @see PhraseQuery#getSlop()
	*/
	public int getSlop() { return slop; }

	/** Add a single term at the next position in the phrase.
	* @see PhraseQuery#add(Term)
	*/
	public void add(Term term) { add(new Term[]{term}); }

	/** Add multiple terms at the next position in the phrase. Any of the terms
	* may match.
	*
	* @see PhraseQuery#add(Term)
	*/
	public void add(Term[] terms) {
	int position = 0;
	if (positions.size() > 0)
	position = ((Integer) positions.lastElement()).intValue() + 1;

	add(terms, position);
	}

	/**
	* Allows to specify the relative position of terms within the phrase.
	*
	* @see PhraseQuery#add(Term, int)
	* @param terms
	* @param position
	*/
	public void add(Term[] terms, int position) {
	if (termArrays.size() == 0)
	field = terms[0].field();

	for (int i = 0; i < terms.length; i++) {
	if (terms[i].field() != field) {
	throw new IllegalArgumentException(
	"All phrase terms must be in the same field (" + field + "): "
	+ terms[i]);
	}
	}

	termArrays.add(terms);
	positions.addElement(new Integer(position));
	}

	/**
	* Returns the relative positions of terms in this phrase.
	*/
	public int[] getPositions() {
	int[] result = new int[positions.size()];
	for (int i = 0; i < positions.size(); i++)
	result[i] = ((Integer) positions.elementAt(i)).intValue();
	return result;
	}

	private class PhrasePrefixWeight implements Weight {
	private Searcher searcher;
	private float value;
	private float idf;
	private float queryNorm;
	private float queryWeight;

	public PhrasePrefixWeight(Searcher searcher) {
	this.searcher = searcher;
	}

	public Query getQuery() { return PhrasePrefixQuery.this; }
	public float getValue() { return value; }

	public float sumOfSquaredWeights() throws IOException {
	Iterator i = termArrays.iterator();
	while (i.hasNext()) {
	Term[] terms = (Term[])i.next();
	for (int j=0; j<terms.length; j++)
	idf += getSimilarity(searcher).idf(terms[j], searcher);
	}

	queryWeight = idf * getBoost(); // compute query weight
	return queryWeight * queryWeight; // square it
	}

	public void normalize(float queryNorm) {
	this.queryNorm = queryNorm;
	queryWeight *= queryNorm; // normalize query weight
	value = queryWeight * idf; // idf for document
	}

	public Scorer scorer(IndexReader reader) throws IOException {
	if (termArrays.size() == 0) // optimize zero-term case
	return null;

	TermPositions[] tps = new TermPositions[termArrays.size()];
	for (int i=0; i<tps.length; i++) {
	Term[] terms = (Term[])termArrays.get(i);

	TermPositions p;
	if (terms.length > 1)
	p = new MultipleTermPositions(reader, terms);
	else
	p = reader.termPositions(terms[0]);

	if (p == null)
	return null;

	tps[i] = p;
	}

	if (slop == 0)
	return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
	reader.norms(field));
	else
	return new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
	slop, reader.norms(field));
	}

	public Explanation explain(IndexReader reader, int doc)
	throws IOException {
	Explanation result = new Explanation();
	result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");

	Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");

	// explain query weight
	Explanation queryExpl = new Explanation();
	queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");

	Explanation boostExpl = new Explanation(getBoost(), "boost");
	if (getBoost() != 1.0f)
	queryExpl.addDetail(boostExpl);

	queryExpl.addDetail(idfExpl);

	Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
	queryExpl.addDetail(queryNormExpl);

	queryExpl.setValue(boostExpl.getValue() *
	idfExpl.getValue() *
	queryNormExpl.getValue());

	result.addDetail(queryExpl);

	// explain field weight
	Explanation fieldExpl = new Explanation();
	fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
	"), product of:");

	Explanation tfExpl = scorer(reader).explain(doc);
	fieldExpl.addDetail(tfExpl);
	fieldExpl.addDetail(idfExpl);

	Explanation fieldNormExpl = new Explanation();
	byte[] fieldNorms = reader.norms(field);
	float fieldNorm =
	fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
	fieldNormExpl.setValue(fieldNorm);
	fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
	fieldExpl.addDetail(fieldNormExpl);

	fieldExpl.setValue(tfExpl.getValue() *
	idfExpl.getValue() *
	fieldNormExpl.getValue());

	result.addDetail(fieldExpl);

	// combine them
	result.setValue(queryExpl.getValue() * fieldExpl.getValue());

	if (queryExpl.getValue() == 1.0f)
	return fieldExpl;

	return result;
	}
	}

	protected Weight createWeight(Searcher searcher) {
	if (termArrays.size() == 1) { // optimize one-term case
	Term[] terms = (Term[])termArrays.get(0);
	BooleanQuery boq = new BooleanQuery();
	for (int i=0; i<terms.length; i++) {
	boq.add(new TermQuery(terms[i]), false, false);
	}
	boq.setBoost(getBoost());
	return boq.createWeight(searcher);
	}
	return new PhrasePrefixWeight(searcher);
	}

	/** Prints a user-readable version of this query. */
	public final String toString(String f) {
	StringBuffer buffer = new StringBuffer();
	if (!field.equals(f)) {
	buffer.append(field);
	buffer.append(":");
	}

	buffer.append("\"");
	Iterator i = termArrays.iterator();
	while (i.hasNext()) {
	Term[] terms = (Term[])i.next();
	buffer.append(terms[0].text() + (terms.length > 1 ? "*" : ""));
	if (i.hasNext())
	buffer.append(" ");
	}
	buffer.append("\"");

	if (slop != 0) {
	buffer.append("~");
	buffer.append(slop);
	}

	if (getBoost() != 1.0f) {
	buffer.append("^");
	buffer.append(Float.toString(getBoost()));
	}

	return buffer.toString();
	}
	}