lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.sandbox.queries;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Objects;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermStates;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostAttribute;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.FuzzyTermsEnum;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;

 /**
  * Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms. In
  * effect this mixes the behaviour of FuzzyQuery and MoreLikeThis but with special consideration of
  * fuzzy scoring factors. This generally produces good results for queries where users may provide
  * details in a number of fields and have no knowledge of boolean query syntax and also want a
  * degree of fuzzy matching and a fast query.
  *
  * <p>For each source term the fuzzy variants are held in a BooleanQuery with no coord factor
  * (because we are not looking for matches on multiple variants in any one doc). Additionally, a
  * specialized TermQuery is used for variants and does not use that variant term's IDF because this
  * would favour rarer terms eg misspellings. Instead, all variants use the same IDF ranking (the one
  * for the source query term) and this is factored into the variant's boost. If the source query
  * term does not exist in the index the average IDF of the variants is used.
  */
 public class FuzzyLikeThisQuery extends Query {
   // TODO: generalize this query (at least it should not reuse this static sim!
   // a better way might be to convert this into multitermquery rewrite methods.
   // the rewrite method can 'average' the TermStates's term statistics (docfreq,totalTermFreq)
   // provided to TermQuery, so that the general idea is agnostic to any scoring system...
   static TFIDFSimilarity sim = new ClassicSimilarity();
   ArrayList<FieldVals> fieldVals = new ArrayList<>();
   Analyzer analyzer;

   int MAX_VARIANTS_PER_TERM = 50;
   boolean ignoreTF = false;
   private int maxNumTerms;

   @Override
   public int hashCode() {
     int prime = 31;
     int result = classHash();
     result = prime * result + Objects.hashCode(analyzer);
     result = prime * result + Objects.hashCode(fieldVals);
     result = prime * result + (ignoreTF ? 1231 : 1237);
     result = prime * result + maxNumTerms;
     return result;
   }

   @Override
   public boolean equals(Object other) {
     return sameClassAs(other) && equalsTo(getClass().cast(other));
   }

   private boolean equalsTo(FuzzyLikeThisQuery other) {
     return Objects.equals(analyzer, other.analyzer)
         && Objects.equals(fieldVals, other.fieldVals)
         && ignoreTF == other.ignoreTF
         && maxNumTerms == other.maxNumTerms;
   }

   /**
    * @param maxNumTerms The total number of terms clauses that will appear once rewritten as a
    *     BooleanQuery
    */
   public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer) {
     this.analyzer = analyzer;
     this.maxNumTerms = maxNumTerms;
   }

   static class FieldVals {
     String queryString;
     String fieldName;
     int maxEdits;
     int prefixLength;

     public FieldVals(String name, int maxEdits, int length, String queryString) {
       fieldName = name;
       this.maxEdits = maxEdits;
       prefixLength = length;
       this.queryString = queryString;
     }

     @Override
     public int hashCode() {
       final int prime = 31;
       int result = 1;
       result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
       result = prime * result + maxEdits;
       result = prime * result + prefixLength;
       result = prime * result + ((queryString == null) ? 0 : queryString.hashCode());
       return result;
     }

     @Override
     public boolean equals(Object obj) {
       if (this == obj) {
         return true;
       }
       if (obj == null) {
         return false;
       }
       if (getClass() != obj.getClass()) {
         return false;
       }
       FieldVals other = (FieldVals) obj;
       if (fieldName == null) {
         if (other.fieldName != null) {
           return false;
         }
       } else if (!fieldName.equals(other.fieldName)) return false;
       if (maxEdits != other.maxEdits) {
         return false;
       }
       if (prefixLength != other.prefixLength) {
         return false;
       }
       if (queryString == null) {
         if (other.queryString != null) {
           return false;
         }
       } else if (!queryString.equals(other.queryString)) {
         return false;
       }
       return true;
     }
   }

   /**
    * Adds user input for "fuzzification"
    *
    * @param queryString The string which will be parsed by the analyzer and for which fuzzy variants
    *     will be parsed
    * @param minSimilarity The minimum similarity of the term variants; must be 0, 1 or 2 (see
    *     FuzzyTermsEnum)
    * @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
    */
   public void addTerms(
       String queryString, String fieldName, float minSimilarity, int prefixLength) {
     int maxEdits = (int) minSimilarity;
     if (maxEdits != minSimilarity
         || maxEdits < 0
         || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
       throw new IllegalArgumentException(
           "minSimilarity must integer value between 0 and "
               + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE
               + ", inclusive; got "
               + minSimilarity);
     }
     fieldVals.add(new FieldVals(fieldName, maxEdits, prefixLength, queryString));
   }

   private void addTerms(IndexReader reader, FieldVals f, ScoreTermQueue q) throws IOException {
     if (f.queryString == null) return;
     final Terms terms = MultiTerms.getTerms(reader, f.fieldName);
     if (terms == null) {
       return;
     }
     try (TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString)) {
       CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

       int corpusNumDocs = reader.numDocs();
       HashSet<String> processedTerms = new HashSet<>();
       ts.reset();
       while (ts.incrementToken()) {
         String term = termAtt.toString();
         if (!processedTerms.contains(term)) {
           processedTerms.add(term);
           ScoreTermQueue variantsQ =
               new ScoreTermQueue(
                   MAX_VARIANTS_PER_TERM); // maxNum variants considered for any one term
           float minScore = 0;
           Term startTerm = new Term(f.fieldName, term);
           FuzzyTermsEnum fe =
               new FuzzyTermsEnum(terms, startTerm, f.maxEdits, f.prefixLength, true);
           // store the df so all variants use same idf
           int df = reader.docFreq(startTerm);
           int numVariants = 0;
           int totalVariantDocFreqs = 0;
           BytesRef possibleMatch;
           BoostAttribute boostAtt = fe.attributes().addAttribute(BoostAttribute.class);
           while ((possibleMatch = fe.next()) != null) {
             numVariants++;
             totalVariantDocFreqs += fe.docFreq();
             float score = boostAtt.getBoost();
             if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore) {
               ScoreTerm st =
                   new ScoreTerm(
                       new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)),
                       score,
                       startTerm);
               variantsQ.insertWithOverflow(st);
               minScore = variantsQ.top().score; // maintain minScore
             }
             fe.setMaxNonCompetitiveBoost(
                 variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
           }

           if (numVariants > 0) {
             int avgDf = totalVariantDocFreqs / numVariants;
             if (df == 0) // no direct match we can use as df for all variants
             {
               df = avgDf; // use avg df of all variants
             }

             // take the top variants (scored by edit distance) and reset the score
             // to include an IDF factor then add to the global queue for ranking
             // overall top query terms
             int size = variantsQ.size();
             for (int i = 0; i < size; i++) {
               ScoreTerm st = variantsQ.pop();
               st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs);
               q.insertWithOverflow(st);
             }
           }
         }
       }
       ts.end();
     }
   }

   private Query newTermQuery(IndexReader reader, Term term) throws IOException {
     if (ignoreTF) {
       return new ConstantScoreQuery(new TermQuery(term));
     } else {
       // we build an artificial TermStates that will give an overall df and ttf
       // equal to 1
       TermStates context = new TermStates(reader.getContext());
       for (LeafReaderContext leafContext : reader.leaves()) {
         Terms terms = leafContext.reader().terms(term.field());
         if (terms != null) {
           TermsEnum termsEnum = terms.iterator();
           if (termsEnum.seekExact(term.bytes())) {
             int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
             context.register(termsEnum.termState(), leafContext.ord, freq, freq);
           }
         }
       }
       return new TermQuery(term, context);
     }
   }

   @Override
   public void visit(QueryVisitor visitor) {
     visitor.visitLeaf(this);
   }

   @Override
   public Query rewrite(IndexReader reader) throws IOException {
     ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
     // load up the list of possible terms
     for (FieldVals f : fieldVals) {
       addTerms(reader, f, q);
     }

     BooleanQuery.Builder bq = new BooleanQuery.Builder();

     // create BooleanQueries to hold the variants for each token/field pair and ensure it
     // has no coord factor
     // Step 1: sort the termqueries by term/field
     HashMap<Term, ArrayList<ScoreTerm>> variantQueries = new HashMap<>();
     int size = q.size();
     for (int i = 0; i < size; i++) {
       ScoreTerm st = q.pop();
       ArrayList<ScoreTerm> l = variantQueries.get(st.fuzziedSourceTerm);
       if (l == null) {
         l = new ArrayList<>();
         variantQueries.put(st.fuzziedSourceTerm, l);
       }
       l.add(st);
     }
     // Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
     for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator();
         iter.hasNext(); ) {
       ArrayList<ScoreTerm> variants = iter.next();
       if (variants.size() == 1) {
         // optimize where only one selected variant
         ScoreTerm st = variants.get(0);
         Query tq = newTermQuery(reader, st.term);
         // set the boost to a mix of IDF and score
         bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
       } else {
         BooleanQuery.Builder termVariants = new BooleanQuery.Builder();
         for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2.hasNext(); ) {
           ScoreTerm st = iterator2.next();
           // found a match
           Query tq = newTermQuery(reader, st.term);
           // set the boost using the ScoreTerm's score
           termVariants.add(
               new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
         }
         bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
       }
     }
     // TODO possible alternative step 3 - organize above booleans into a new layer of field-based
     // booleans with a minimum-should-match of NumFields-1?
     return bq.build();
   }

   // Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
   // term variants) then is reset with IDF for use in ranking against all other
   // terms/fields
   private static class ScoreTerm {
     public Term term;
     public float score;
     Term fuzziedSourceTerm;

     public ScoreTerm(Term term, float score, Term fuzziedSourceTerm) {
       this.term = term;
       this.score = score;
       this.fuzziedSourceTerm = fuzziedSourceTerm;
     }
   }

   private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
     public ScoreTermQueue(int size) {
       super(size);
     }

     /* (non-Javadoc)
      * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
      */
     @Override
     protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
       if (termA.score == termB.score) {
         return termA.term.compareTo(termB.term) > 0;
       } else return termA.score < termB.score;
     }
   }

   /* (non-Javadoc)
    * @see org.apache.lucene.search.Query#toString(java.lang.String)
    */
   @Override
   public String toString(String field) {
     return null;
   }

   public boolean isIgnoreTF() {
     return ignoreTF;
   }

   public void setIgnoreTF(boolean ignoreTF) {
     this.ignoreTF = ignoreTF;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.sandbox.queries;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Iterator;
	import java.util.Objects;
	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.MultiTerms;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermStates;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.search.BooleanClause;
	import org.apache.lucene.search.BooleanQuery;
	import org.apache.lucene.search.BoostAttribute;
	import org.apache.lucene.search.BoostQuery;
	import org.apache.lucene.search.ConstantScoreQuery;
	import org.apache.lucene.search.FuzzyTermsEnum;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.QueryVisitor;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.search.similarities.ClassicSimilarity;
	import org.apache.lucene.search.similarities.TFIDFSimilarity;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.PriorityQueue;
	import org.apache.lucene.util.automaton.LevenshteinAutomata;

	/**
	* Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms. In
	* effect this mixes the behaviour of FuzzyQuery and MoreLikeThis but with special consideration of
	* fuzzy scoring factors. This generally produces good results for queries where users may provide
	* details in a number of fields and have no knowledge of boolean query syntax and also want a
	* degree of fuzzy matching and a fast query.
	*
	* <p>For each source term the fuzzy variants are held in a BooleanQuery with no coord factor
	* (because we are not looking for matches on multiple variants in any one doc). Additionally, a
	* specialized TermQuery is used for variants and does not use that variant term's IDF because this
	* would favour rarer terms eg misspellings. Instead, all variants use the same IDF ranking (the one
	* for the source query term) and this is factored into the variant's boost. If the source query
	* term does not exist in the index the average IDF of the variants is used.
	*/
	public class FuzzyLikeThisQuery extends Query {
	// TODO: generalize this query (at least it should not reuse this static sim!
	// a better way might be to convert this into multitermquery rewrite methods.
	// the rewrite method can 'average' the TermStates's term statistics (docfreq,totalTermFreq)
	// provided to TermQuery, so that the general idea is agnostic to any scoring system...
	static TFIDFSimilarity sim = new ClassicSimilarity();
	ArrayList<FieldVals> fieldVals = new ArrayList<>();
	Analyzer analyzer;

	int MAX_VARIANTS_PER_TERM = 50;
	boolean ignoreTF = false;
	private int maxNumTerms;

	@Override
	public int hashCode() {
	int prime = 31;
	int result = classHash();
	result = prime * result + Objects.hashCode(analyzer);
	result = prime * result + Objects.hashCode(fieldVals);
	result = prime * result + (ignoreTF ? 1231 : 1237);
	result = prime * result + maxNumTerms;
	return result;
	}

	@Override
	public boolean equals(Object other) {
	return sameClassAs(other) && equalsTo(getClass().cast(other));
	}

	private boolean equalsTo(FuzzyLikeThisQuery other) {
	return Objects.equals(analyzer, other.analyzer)
	&& Objects.equals(fieldVals, other.fieldVals)
	&& ignoreTF == other.ignoreTF
	&& maxNumTerms == other.maxNumTerms;
	}

	/**
	* @param maxNumTerms The total number of terms clauses that will appear once rewritten as a
	* BooleanQuery
	*/
	public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer) {
	this.analyzer = analyzer;
	this.maxNumTerms = maxNumTerms;
	}

	static class FieldVals {
	String queryString;
	String fieldName;
	int maxEdits;
	int prefixLength;

	public FieldVals(String name, int maxEdits, int length, String queryString) {
	fieldName = name;
	this.maxEdits = maxEdits;
	prefixLength = length;
	this.queryString = queryString;
	}

	@Override
	public int hashCode() {
	final int prime = 31;
	int result = 1;
	result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
	result = prime * result + maxEdits;
	result = prime * result + prefixLength;
	result = prime * result + ((queryString == null) ? 0 : queryString.hashCode());
	return result;
	}

	@Override
	public boolean equals(Object obj) {
	if (this == obj) {
	return true;
	}
	if (obj == null) {
	return false;
	}
	if (getClass() != obj.getClass()) {
	return false;
	}
	FieldVals other = (FieldVals) obj;
	if (fieldName == null) {
	if (other.fieldName != null) {
	return false;
	}
	} else if (!fieldName.equals(other.fieldName)) return false;
	if (maxEdits != other.maxEdits) {
	return false;
	}
	if (prefixLength != other.prefixLength) {
	return false;
	}
	if (queryString == null) {
	if (other.queryString != null) {
	return false;
	}
	} else if (!queryString.equals(other.queryString)) {
	return false;
	}
	return true;
	}
	}

	/**
	* Adds user input for "fuzzification"
	*
	* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants
	* will be parsed
	* @param minSimilarity The minimum similarity of the term variants; must be 0, 1 or 2 (see
	* FuzzyTermsEnum)
	* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
	*/
	public void addTerms(
	String queryString, String fieldName, float minSimilarity, int prefixLength) {
	int maxEdits = (int) minSimilarity;
	if (maxEdits != minSimilarity
	\|\| maxEdits < 0
	\|\| maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
	throw new IllegalArgumentException(
	"minSimilarity must integer value between 0 and "
	+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE
	+ ", inclusive; got "
	+ minSimilarity);
	}
	fieldVals.add(new FieldVals(fieldName, maxEdits, prefixLength, queryString));
	}

	private void addTerms(IndexReader reader, FieldVals f, ScoreTermQueue q) throws IOException {
	if (f.queryString == null) return;
	final Terms terms = MultiTerms.getTerms(reader, f.fieldName);
	if (terms == null) {
	return;
	}
	try (TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString)) {
	CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);

	int corpusNumDocs = reader.numDocs();
	HashSet<String> processedTerms = new HashSet<>();
	ts.reset();
	while (ts.incrementToken()) {
	String term = termAtt.toString();
	if (!processedTerms.contains(term)) {
	processedTerms.add(term);
	ScoreTermQueue variantsQ =
	new ScoreTermQueue(
	MAX_VARIANTS_PER_TERM); // maxNum variants considered for any one term
	float minScore = 0;
	Term startTerm = new Term(f.fieldName, term);
	FuzzyTermsEnum fe =
	new FuzzyTermsEnum(terms, startTerm, f.maxEdits, f.prefixLength, true);
	// store the df so all variants use same idf
	int df = reader.docFreq(startTerm);
	int numVariants = 0;
	int totalVariantDocFreqs = 0;
	BytesRef possibleMatch;
	BoostAttribute boostAtt = fe.attributes().addAttribute(BoostAttribute.class);
	while ((possibleMatch = fe.next()) != null) {
	numVariants++;
	totalVariantDocFreqs += fe.docFreq();
	float score = boostAtt.getBoost();
	if (variantsQ.size() < MAX_VARIANTS_PER_TERM \|\| score > minScore) {
	ScoreTerm st =
	new ScoreTerm(
	new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)),
	score,
	startTerm);
	variantsQ.insertWithOverflow(st);
	minScore = variantsQ.top().score; // maintain minScore
	}
	fe.setMaxNonCompetitiveBoost(
	variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
	}

	if (numVariants > 0) {
	int avgDf = totalVariantDocFreqs / numVariants;
	if (df == 0) // no direct match we can use as df for all variants
	{
	df = avgDf; // use avg df of all variants
	}

	// take the top variants (scored by edit distance) and reset the score
	// to include an IDF factor then add to the global queue for ranking
	// overall top query terms
	int size = variantsQ.size();
	for (int i = 0; i < size; i++) {
	ScoreTerm st = variantsQ.pop();
	st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs);
	q.insertWithOverflow(st);
	}
	}
	}
	}
	ts.end();
	}
	}

	private Query newTermQuery(IndexReader reader, Term term) throws IOException {
	if (ignoreTF) {
	return new ConstantScoreQuery(new TermQuery(term));
	} else {
	// we build an artificial TermStates that will give an overall df and ttf
	// equal to 1
	TermStates context = new TermStates(reader.getContext());
	for (LeafReaderContext leafContext : reader.leaves()) {
	Terms terms = leafContext.reader().terms(term.field());
	if (terms != null) {
	TermsEnum termsEnum = terms.iterator();
	if (termsEnum.seekExact(term.bytes())) {
	int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
	context.register(termsEnum.termState(), leafContext.ord, freq, freq);
	}
	}
	}
	return new TermQuery(term, context);
	}
	}

	@Override
	public void visit(QueryVisitor visitor) {
	visitor.visitLeaf(this);
	}

	@Override
	public Query rewrite(IndexReader reader) throws IOException {
	ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
	// load up the list of possible terms
	for (FieldVals f : fieldVals) {
	addTerms(reader, f, q);
	}

	BooleanQuery.Builder bq = new BooleanQuery.Builder();

	// create BooleanQueries to hold the variants for each token/field pair and ensure it
	// has no coord factor
	// Step 1: sort the termqueries by term/field
	HashMap<Term, ArrayList<ScoreTerm>> variantQueries = new HashMap<>();
	int size = q.size();
	for (int i = 0; i < size; i++) {
	ScoreTerm st = q.pop();
	ArrayList<ScoreTerm> l = variantQueries.get(st.fuzziedSourceTerm);
	if (l == null) {
	l = new ArrayList<>();
	variantQueries.put(st.fuzziedSourceTerm, l);
	}
	l.add(st);
	}
	// Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
	for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator();
	iter.hasNext(); ) {
	ArrayList<ScoreTerm> variants = iter.next();
	if (variants.size() == 1) {
	// optimize where only one selected variant
	ScoreTerm st = variants.get(0);
	Query tq = newTermQuery(reader, st.term);
	// set the boost to a mix of IDF and score
	bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
	} else {
	BooleanQuery.Builder termVariants = new BooleanQuery.Builder();
	for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2.hasNext(); ) {
	ScoreTerm st = iterator2.next();
	// found a match
	Query tq = newTermQuery(reader, st.term);
	// set the boost using the ScoreTerm's score
	termVariants.add(
	new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
	}
	bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
	}
	}
	// TODO possible alternative step 3 - organize above booleans into a new layer of field-based
	// booleans with a minimum-should-match of NumFields-1?
	return bq.build();
	}

	// Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
	// term variants) then is reset with IDF for use in ranking against all other
	// terms/fields
	private static class ScoreTerm {
	public Term term;
	public float score;
	Term fuzziedSourceTerm;

	public ScoreTerm(Term term, float score, Term fuzziedSourceTerm) {
	this.term = term;
	this.score = score;
	this.fuzziedSourceTerm = fuzziedSourceTerm;
	}
	}

	private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
	public ScoreTermQueue(int size) {
	super(size);
	}

	/* (non-Javadoc)
	* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
	*/
	@Override
	protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
	if (termA.score == termB.score) {
	return termA.term.compareTo(termB.term) > 0;
	} else return termA.score < termB.score;
	}
	}

	/* (non-Javadoc)
	* @see org.apache.lucene.search.Query#toString(java.lang.String)
	*/
	@Override
	public String toString(String field) {
	return null;
	}

	public boolean isIgnoreTF() {
	return ignoreTF;
	}

	public void setIgnoreTF(boolean ignoreTF) {
	this.ignoreTF = ignoreTF;
	}
	}