lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java - lucene - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SlowImpactsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermStates;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.similarities.Similarity.SimScorer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;

 /**
  * A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built
  * by QueryParser for input like <code>"new york"</code>.
  *
  * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
  *
  * <p><b>NOTE</b>: All terms in the phrase must match, even those at the same position. If you have
  * terms at the same position, perhaps synonyms, you probably want {@link MultiPhraseQuery} instead
  * which only requires one term at a position to match. <br>
  * Also, Leading holes don't have any particular meaning for this query and will be ignored. For
  * instance this query:
  *
  * <pre class="prettyprint">
  * PhraseQuery.Builder builder = new PhraseQuery.Builder();
  * builder.add(new Term("body", "one"), 4);
  * builder.add(new Term("body", "two"), 5);
  * PhraseQuery pq = builder.build();
  * </pre>
  *
  * is equivalent to the below query:
  *
  * <pre class="prettyprint">
  * PhraseQuery.Builder builder = new PhraseQuery.Builder();
  * builder.add(new Term("body", "one"), 0);
  * builder.add(new Term("body", "two"), 1);
  * PhraseQuery pq = builder.build();
  * </pre>
  */
 public class PhraseQuery extends Query {

   /** A builder for phrase queries. */
   public static class Builder {

     private int slop;
     private final List<Term> terms;
     private final List<Integer> positions;

     /** Sole constructor. */
     public Builder() {
       slop = 0;
       terms = new ArrayList<>();
       positions = new ArrayList<>();
     }

     /**
      * Set the slop.
      *
      * @see PhraseQuery#getSlop()
      */
     public Builder setSlop(int slop) {
       this.slop = slop;
       return this;
     }

     /**
      * Adds a term to the end of the query phrase. The relative position of the term is the one
      * immediately after the last term added.
      */
     public Builder add(Term term) {
       return add(term, positions.isEmpty() ? 0 : 1 + positions.get(positions.size() - 1));
     }

     /**
      * Adds a term to the end of the query phrase. The relative position of the term within the
      * phrase is specified explicitly, but must be greater than or equal to that of the previously
      * added term. A greater position allows phrases with gaps (e.g. in connection with stopwords).
      * If the position is equal, you most likely should be using {@link MultiPhraseQuery} instead
      * which only requires one term at each position to match; this class requires all of them.
      */
     public Builder add(Term term, int position) {
       Objects.requireNonNull(term, "Cannot add a null term to PhraseQuery");
       if (position < 0) {
         throw new IllegalArgumentException("Positions must be >= 0, got " + position);
       }
       if (positions.isEmpty() == false) {
         final int lastPosition = positions.get(positions.size() - 1);
         if (position < lastPosition) {
           throw new IllegalArgumentException(
               "Positions must be added in order, got " + position + " after " + lastPosition);
         }
       }
       if (terms.isEmpty() == false && term.field().equals(terms.get(0).field()) == false) {
         throw new IllegalArgumentException(
             "All terms must be on the same field, got "
                 + term.field()
                 + " and "
                 + terms.get(0).field());
       }
       terms.add(term);
       positions.add(position);
       return this;
     }

     /** Build a phrase query based on the terms that have been added. */
     public PhraseQuery build() {
       Term[] terms = this.terms.toArray(new Term[this.terms.size()]);
       int[] positions = new int[this.positions.size()];
       for (int i = 0; i < positions.length; ++i) {
         positions[i] = this.positions.get(i);
       }
       return new PhraseQuery(slop, terms, positions);
     }
   }

   private final int slop;
   private final String field;
   private final Term[] terms;
   private final int[] positions;

   private PhraseQuery(int slop, Term[] terms, int[] positions) {
     if (terms.length != positions.length) {
       throw new IllegalArgumentException("Must have as many terms as positions");
     }
     if (slop < 0) {
       throw new IllegalArgumentException("Slop must be >= 0, got " + slop);
     }
     for (Term term : terms) {
       Objects.requireNonNull(term, "Cannot add a null term to PhraseQuery");
     }
     for (int i = 1; i < terms.length; ++i) {
       if (terms[i - 1].field().equals(terms[i].field()) == false) {
         throw new IllegalArgumentException("All terms should have the same field");
       }
     }
     for (int position : positions) {
       if (position < 0) {
         throw new IllegalArgumentException("Positions must be >= 0, got " + position);
       }
     }
     for (int i = 1; i < positions.length; ++i) {
       if (positions[i] < positions[i - 1]) {
         throw new IllegalArgumentException(
             "Positions should not go backwards, got "
                 + positions[i - 1]
                 + " before "
                 + positions[i]);
       }
     }
     this.slop = slop;
     this.terms = terms;
     this.positions = positions;
     this.field = terms.length == 0 ? null : terms[0].field();
   }

   private static int[] incrementalPositions(int length) {
     int[] positions = new int[length];
     for (int i = 0; i < length; ++i) {
       positions[i] = i;
     }
     return positions;
   }

   private static Term[] toTerms(String field, String... termStrings) {
     Term[] terms = new Term[termStrings.length];
     for (int i = 0; i < terms.length; ++i) {
       Objects.requireNonNull(termStrings[i], "Cannot add a null term to PhraseQuery");
       terms[i] = new Term(field, termStrings[i]);
     }
     return terms;
   }

   private static Term[] toTerms(String field, BytesRef... termBytes) {
     Term[] terms = new Term[termBytes.length];
     for (int i = 0; i < terms.length; ++i) {
       Objects.requireNonNull(termBytes[i], "Cannot add a null term to PhraseQuery");
       terms[i] = new Term(field, termBytes[i]);
     }
     return terms;
   }

   /**
    * Create a phrase query which will match documents that contain the given list of terms at
    * consecutive positions in {@code field}, and at a maximum edit distance of {@code slop}. For
    * more complicated use-cases, use {@link PhraseQuery.Builder}.
    *
    * @see #getSlop()
    */
   public PhraseQuery(int slop, String field, String... terms) {
     this(slop, toTerms(field, terms), incrementalPositions(terms.length));
   }

   /**
    * Create a phrase query which will match documents that contain the given list of terms at
    * consecutive positions in {@code field}.
    */
   public PhraseQuery(String field, String... terms) {
     this(0, field, terms);
   }

   /**
    * Create a phrase query which will match documents that contain the given list of terms at
    * consecutive positions in {@code field}, and at a maximum edit distance of {@code slop}. For
    * more complicated use-cases, use {@link PhraseQuery.Builder}.
    *
    * @see #getSlop()
    */
   public PhraseQuery(int slop, String field, BytesRef... terms) {
     this(slop, toTerms(field, terms), incrementalPositions(terms.length));
   }

   /**
    * Create a phrase query which will match documents that contain the given list of terms at
    * consecutive positions in {@code field}.
    */
   public PhraseQuery(String field, BytesRef... terms) {
     this(0, field, terms);
   }

   /**
    * Return the slop for this {@link PhraseQuery}.
    *
    * <p>The slop is an edit distance between respective positions of terms as defined in this {@link
    * PhraseQuery} and the positions of terms in a document.
    *
    * <p>For instance, when searching for {@code "quick fox"}, it is expected that the difference
    * between the positions of {@code fox} and {@code quick} is 1. So {@code "a quick brown fox"}
    * would be at an edit distance of 1 since the difference of the positions of {@code fox} and
    * {@code quick} is 2. Similarly, {@code "the fox is quick"} would be at an edit distance of 3
    * since the difference of the positions of {@code fox} and {@code quick} is -2. The slop defines
    * the maximum edit distance for a document to match.
    *
    * <p>More exact matches are scored higher than sloppier matches, thus search results are sorted
    * by exactness.
    */
   public int getSlop() {
     return slop;
   }

   /** Returns the field this query applies to */
   public String getField() {
     return field;
   }

   /** Returns the list of terms in this phrase. */
   public Term[] getTerms() {
     return terms;
   }

   /** Returns the relative positions of terms in this phrase. */
   public int[] getPositions() {
     return positions;
   }

   @Override
   public Query rewrite(IndexReader reader) throws IOException {
     if (terms.length == 0) {
       return new MatchNoDocsQuery("empty PhraseQuery");
     } else if (terms.length == 1) {
       return new TermQuery(terms[0]);
     } else if (positions[0] != 0) {
       int[] newPositions = new int[positions.length];
       for (int i = 0; i < positions.length; ++i) {
         newPositions[i] = positions[i] - positions[0];
       }
       return new PhraseQuery(slop, terms, newPositions);
     } else {
       return super.rewrite(reader);
     }
   }

   @Override
   public void visit(QueryVisitor visitor) {
     if (visitor.acceptField(field) == false) {
       return;
     }
     QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
     v.consumeTerms(this, terms);
   }

   /**
    * Term postings and position information for phrase matching
    *
    * @lucene.internal
    */
   public static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
     final PostingsEnum postings;
     final ImpactsEnum impacts;
     final int position;
     final Term[] terms;
     final int nTerms; // for faster comparisons

     /** Creates PostingsAndFreq instance */
     public PostingsAndFreq(
         PostingsEnum postings, ImpactsEnum impacts, int position, Term... terms) {
       this.postings = postings;
       this.impacts = impacts;
       this.position = position;
       nTerms = terms == null ? 0 : terms.length;
       if (nTerms > 0) {
         if (terms.length == 1) {
           this.terms = terms;
         } else {
           Term[] terms2 = new Term[terms.length];
           System.arraycopy(terms, 0, terms2, 0, terms.length);
           Arrays.sort(terms2);
           this.terms = terms2;
         }
       } else {
         this.terms = null;
       }
     }

     public PostingsAndFreq(
         PostingsEnum postings, ImpactsEnum impacts, int position, List<Term> terms) {
       this.postings = postings;
       this.impacts = impacts;
       this.position = position;
       nTerms = terms == null ? 0 : terms.size();
       if (nTerms > 0) {
         Term[] terms2 = terms.toArray(new Term[0]);
         if (nTerms > 1) {
           Arrays.sort(terms2);
         }
         this.terms = terms2;
       } else {
         this.terms = null;
       }
     }

     @Override
     public int compareTo(PostingsAndFreq other) {
       if (position != other.position) {
         return position - other.position;
       }
       if (nTerms != other.nTerms) {
         return nTerms - other.nTerms;
       }
       if (nTerms == 0) {
         return 0;
       }
       for (int i = 0; i < terms.length; i++) {
         int res = terms[i].compareTo(other.terms[i]);
         if (res != 0) return res;
       }
       return 0;
     }

     @Override
     public int hashCode() {
       final int prime = 31;
       int result = 1;
       result = prime * result + position;
       for (int i = 0; i < nTerms; i++) {
         result = prime * result + terms[i].hashCode();
       }
       return result;
     }

     @Override
     public boolean equals(Object obj) {
       if (this == obj) return true;
       if (obj == null) return false;
       if (getClass() != obj.getClass()) return false;
       PostingsAndFreq other = (PostingsAndFreq) obj;
       if (position != other.position) return false;
       if (terms == null) return other.terms == null;
       return Arrays.equals(terms, other.terms);
     }
   }

   /**
    * A guess of the average number of simple operations for the initial seek and buffer refill per
    * document for the positions of a term. See also {@link
    * Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()}.
    *
    * <p>Aside: Instead of being constant this could depend among others on {@link
    * Lucene90PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
    * TermsEnum#totalTermFreq()}, {@link DocIdSetIterator#cost()} (expected number of matching docs),
    * {@link LeafReader#maxDoc()} (total number of docs in the segment), and the seek time and block
    * size of the device storing the index.
    */
   private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;

   /**
    * Number of simple operations in {@link
    * Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()} when no seek or buffer refill
    * is done.
    */
   private static final int TERM_OPS_PER_POS = 7;

   /**
    * Returns an expected cost in simple operations of processing the occurrences of a term in a
    * document that contains the term. This is for use by {@link TwoPhaseIterator#matchCost}
    * implementations.
    *
    * @param termsEnum The term is the term at which this TermsEnum is positioned.
    */
   public static float termPositionsCost(TermsEnum termsEnum) throws IOException {
     int docFreq = termsEnum.docFreq();
     assert docFreq > 0;
     long totalTermFreq = termsEnum.totalTermFreq();
     float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
     return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
   }

   @Override
   public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
       throws IOException {
     return new PhraseWeight(this, field, searcher, scoreMode) {

       private transient TermStates[] states;

       @Override
       protected Similarity.SimScorer getStats(IndexSearcher searcher) throws IOException {
         final int[] positions = PhraseQuery.this.getPositions();
         if (positions.length < 2) {
           throw new IllegalStateException(
               "PhraseWeight does not support less than 2 terms, call rewrite first");
         } else if (positions[0] != 0) {
           throw new IllegalStateException(
               "PhraseWeight requires that the first position is 0, call rewrite first");
         }
         final IndexReaderContext context = searcher.getTopReaderContext();
         states = new TermStates[terms.length];
         TermStatistics[] termStats = new TermStatistics[terms.length];
         int termUpTo = 0;
         for (int i = 0; i < terms.length; i++) {
           final Term term = terms[i];
           states[i] = TermStates.build(context, term, scoreMode.needsScores());
           if (scoreMode.needsScores()) {
             TermStates ts = states[i];
             if (ts.docFreq() > 0) {
               termStats[termUpTo++] =
                   searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq());
             }
           }
         }
         if (termUpTo > 0) {
           return similarity.scorer(
               boost,
               searcher.collectionStatistics(field),
               ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
         } else {
           return null; // no terms at all, we won't use similarity
         }
       }

       @Override
       protected PhraseMatcher getPhraseMatcher(
           LeafReaderContext context, SimScorer scorer, boolean exposeOffsets) throws IOException {
         assert terms.length > 0;
         final LeafReader reader = context.reader();
         PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length];

         final Terms fieldTerms = reader.terms(field);
         if (fieldTerms == null) {
           return null;
         }

         if (fieldTerms.hasPositions() == false) {
           throw new IllegalStateException(
               "field \""
                   + field
                   + "\" was indexed without position data; cannot run PhraseQuery (phrase="
                   + getQuery()
                   + ")");
         }

         // Reuse single TermsEnum below:
         final TermsEnum te = fieldTerms.iterator();
         float totalMatchCost = 0;

         for (int i = 0; i < terms.length; i++) {
           final Term t = terms[i];
           final TermState state = states[i].get(context);
           if (state == null) {
             /* term doesnt exist in this segment */
             assert termNotInReader(reader, t) : "no termstate found but term exists in reader";
             return null;
           }
           te.seekExact(t.bytes(), state);
           PostingsEnum postingsEnum;
           ImpactsEnum impactsEnum;
           if (scoreMode == ScoreMode.TOP_SCORES) {
             postingsEnum =
                 impactsEnum =
                     te.impacts(exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
           } else {
             postingsEnum =
                 te.postings(null, exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
             impactsEnum = new SlowImpactsEnum(postingsEnum);
           }
           postingsFreqs[i] = new PostingsAndFreq(postingsEnum, impactsEnum, positions[i], t);
           totalMatchCost += termPositionsCost(te);
         }

         // sort by increasing docFreq order
         if (slop == 0) {
           ArrayUtil.timSort(postingsFreqs);
           return new ExactPhraseMatcher(postingsFreqs, scoreMode, scorer, totalMatchCost);
         } else {
           return new SloppyPhraseMatcher(
               postingsFreqs, slop, scoreMode, scorer, totalMatchCost, exposeOffsets);
         }
       }
     };
   }

   // only called from assert
   private static boolean termNotInReader(LeafReader reader, Term term) throws IOException {
     return reader.docFreq(term) == 0;
   }

   /** Prints a user-readable version of this query. */
   @Override
   public String toString(String f) {
     StringBuilder buffer = new StringBuilder();
     if (field != null && !field.equals(f)) {
       buffer.append(field);
       buffer.append(":");
     }

     buffer.append("\"");
     final int maxPosition;
     if (positions.length == 0) {
       maxPosition = -1;
     } else {
       maxPosition = positions[positions.length - 1];
     }
     String[] pieces = new String[maxPosition + 1];
     for (int i = 0; i < terms.length; i++) {
       int pos = positions[i];
       String s = pieces[pos];
       if (s == null) {
         s = (terms[i]).text();
       } else {
         s = s + "|" + (terms[i]).text();
       }
       pieces[pos] = s;
     }
     for (int i = 0; i < pieces.length; i++) {
       if (i > 0) {
         buffer.append(' ');
       }
       String s = pieces[i];
       if (s == null) {
         buffer.append('?');
       } else {
         buffer.append(s);
       }
     }
     buffer.append("\"");

     if (slop != 0) {
       buffer.append("~");
       buffer.append(slop);
     }

     return buffer.toString();
   }

   /** Returns true iff <code>o</code> is equal to this. */
   @Override
   public boolean equals(Object other) {
     return sameClassAs(other) && equalsTo(getClass().cast(other));
   }

   private boolean equalsTo(PhraseQuery other) {
     return slop == other.slop
         && Arrays.equals(terms, other.terms)
         && Arrays.equals(positions, other.positions);
   }

   /** Returns a hash code value for this object. */
   @Override
   public int hashCode() {
     int h = classHash();
     h = 31 * h + slop;
     h = 31 * h + Arrays.hashCode(terms);
     h = 31 * h + Arrays.hashCode(positions);
     return h;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;
	import java.util.Objects;
	import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
	import org.apache.lucene.codecs.lucene90.Lucene90PostingsReader;
	import org.apache.lucene.index.ImpactsEnum;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.IndexReaderContext;
	import org.apache.lucene.index.LeafReader;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.PostingsEnum;
	import org.apache.lucene.index.SlowImpactsEnum;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermState;
	import org.apache.lucene.index.TermStates;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.search.similarities.Similarity;
	import org.apache.lucene.search.similarities.Similarity.SimScorer;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.BytesRef;

	/**
	* A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built
	* by QueryParser for input like <code>"new york"</code>.
	*
	* <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
	*
	* <p><b>NOTE</b>: All terms in the phrase must match, even those at the same position. If you have
	* terms at the same position, perhaps synonyms, you probably want {@link MultiPhraseQuery} instead
	* which only requires one term at a position to match. <br>
	* Also, Leading holes don't have any particular meaning for this query and will be ignored. For
	* instance this query:
	*
	* <pre class="prettyprint">
	* PhraseQuery.Builder builder = new PhraseQuery.Builder();
	* builder.add(new Term("body", "one"), 4);
	* builder.add(new Term("body", "two"), 5);
	* PhraseQuery pq = builder.build();
	* </pre>
	*
	* is equivalent to the below query:
	*
	* <pre class="prettyprint">
	* PhraseQuery.Builder builder = new PhraseQuery.Builder();
	* builder.add(new Term("body", "one"), 0);
	* builder.add(new Term("body", "two"), 1);
	* PhraseQuery pq = builder.build();
	* </pre>
	*/
	public class PhraseQuery extends Query {

	/** A builder for phrase queries. */
	public static class Builder {

	private int slop;
	private final List<Term> terms;
	private final List<Integer> positions;

	/** Sole constructor. */
	public Builder() {
	slop = 0;
	terms = new ArrayList<>();
	positions = new ArrayList<>();
	}

	/**
	* Set the slop.
	*
	* @see PhraseQuery#getSlop()
	*/
	public Builder setSlop(int slop) {
	this.slop = slop;
	return this;
	}

	/**
	* Adds a term to the end of the query phrase. The relative position of the term is the one
	* immediately after the last term added.
	*/
	public Builder add(Term term) {
	return add(term, positions.isEmpty() ? 0 : 1 + positions.get(positions.size() - 1));
	}

	/**
	* Adds a term to the end of the query phrase. The relative position of the term within the
	* phrase is specified explicitly, but must be greater than or equal to that of the previously
	* added term. A greater position allows phrases with gaps (e.g. in connection with stopwords).
	* If the position is equal, you most likely should be using {@link MultiPhraseQuery} instead
	* which only requires one term at each position to match; this class requires all of them.
	*/
	public Builder add(Term term, int position) {
	Objects.requireNonNull(term, "Cannot add a null term to PhraseQuery");
	if (position < 0) {
	throw new IllegalArgumentException("Positions must be >= 0, got " + position);
	}
	if (positions.isEmpty() == false) {
	final int lastPosition = positions.get(positions.size() - 1);
	if (position < lastPosition) {
	throw new IllegalArgumentException(
	"Positions must be added in order, got " + position + " after " + lastPosition);
	}
	}
	if (terms.isEmpty() == false && term.field().equals(terms.get(0).field()) == false) {
	throw new IllegalArgumentException(
	"All terms must be on the same field, got "
	+ term.field()
	+ " and "
	+ terms.get(0).field());
	}
	terms.add(term);
	positions.add(position);
	return this;
	}

	/** Build a phrase query based on the terms that have been added. */
	public PhraseQuery build() {
	Term[] terms = this.terms.toArray(new Term[this.terms.size()]);
	int[] positions = new int[this.positions.size()];
	for (int i = 0; i < positions.length; ++i) {
	positions[i] = this.positions.get(i);
	}
	return new PhraseQuery(slop, terms, positions);
	}
	}

	private final int slop;
	private final String field;
	private final Term[] terms;
	private final int[] positions;

	private PhraseQuery(int slop, Term[] terms, int[] positions) {
	if (terms.length != positions.length) {
	throw new IllegalArgumentException("Must have as many terms as positions");
	}
	if (slop < 0) {
	throw new IllegalArgumentException("Slop must be >= 0, got " + slop);
	}
	for (Term term : terms) {
	Objects.requireNonNull(term, "Cannot add a null term to PhraseQuery");
	}
	for (int i = 1; i < terms.length; ++i) {
	if (terms[i - 1].field().equals(terms[i].field()) == false) {
	throw new IllegalArgumentException("All terms should have the same field");
	}
	}
	for (int position : positions) {
	if (position < 0) {
	throw new IllegalArgumentException("Positions must be >= 0, got " + position);
	}
	}
	for (int i = 1; i < positions.length; ++i) {
	if (positions[i] < positions[i - 1]) {
	throw new IllegalArgumentException(
	"Positions should not go backwards, got "
	+ positions[i - 1]
	+ " before "
	+ positions[i]);
	}
	}
	this.slop = slop;
	this.terms = terms;
	this.positions = positions;
	this.field = terms.length == 0 ? null : terms[0].field();
	}

	private static int[] incrementalPositions(int length) {
	int[] positions = new int[length];
	for (int i = 0; i < length; ++i) {
	positions[i] = i;
	}
	return positions;
	}

	private static Term[] toTerms(String field, String... termStrings) {
	Term[] terms = new Term[termStrings.length];
	for (int i = 0; i < terms.length; ++i) {
	Objects.requireNonNull(termStrings[i], "Cannot add a null term to PhraseQuery");
	terms[i] = new Term(field, termStrings[i]);
	}
	return terms;
	}

	private static Term[] toTerms(String field, BytesRef... termBytes) {
	Term[] terms = new Term[termBytes.length];
	for (int i = 0; i < terms.length; ++i) {
	Objects.requireNonNull(termBytes[i], "Cannot add a null term to PhraseQuery");
	terms[i] = new Term(field, termBytes[i]);
	}
	return terms;
	}

	/**
	* Create a phrase query which will match documents that contain the given list of terms at
	* consecutive positions in {@code field}, and at a maximum edit distance of {@code slop}. For
	* more complicated use-cases, use {@link PhraseQuery.Builder}.
	*
	* @see #getSlop()
	*/
	public PhraseQuery(int slop, String field, String... terms) {
	this(slop, toTerms(field, terms), incrementalPositions(terms.length));
	}

	/**
	* Create a phrase query which will match documents that contain the given list of terms at
	* consecutive positions in {@code field}.
	*/
	public PhraseQuery(String field, String... terms) {
	this(0, field, terms);
	}

	/**
	* Create a phrase query which will match documents that contain the given list of terms at
	* consecutive positions in {@code field}, and at a maximum edit distance of {@code slop}. For
	* more complicated use-cases, use {@link PhraseQuery.Builder}.
	*
	* @see #getSlop()
	*/
	public PhraseQuery(int slop, String field, BytesRef... terms) {
	this(slop, toTerms(field, terms), incrementalPositions(terms.length));
	}

	/**
	* Create a phrase query which will match documents that contain the given list of terms at
	* consecutive positions in {@code field}.
	*/
	public PhraseQuery(String field, BytesRef... terms) {
	this(0, field, terms);
	}

	/**
	* Return the slop for this {@link PhraseQuery}.
	*
	* <p>The slop is an edit distance between respective positions of terms as defined in this {@link
	* PhraseQuery} and the positions of terms in a document.
	*
	* <p>For instance, when searching for {@code "quick fox"}, it is expected that the difference
	* between the positions of {@code fox} and {@code quick} is 1. So {@code "a quick brown fox"}
	* would be at an edit distance of 1 since the difference of the positions of {@code fox} and
	* {@code quick} is 2. Similarly, {@code "the fox is quick"} would be at an edit distance of 3
	* since the difference of the positions of {@code fox} and {@code quick} is -2. The slop defines
	* the maximum edit distance for a document to match.
	*
	* <p>More exact matches are scored higher than sloppier matches, thus search results are sorted
	* by exactness.
	*/
	public int getSlop() {
	return slop;
	}

	/** Returns the field this query applies to */
	public String getField() {
	return field;
	}

	/** Returns the list of terms in this phrase. */
	public Term[] getTerms() {
	return terms;
	}

	/** Returns the relative positions of terms in this phrase. */
	public int[] getPositions() {
	return positions;
	}

	@Override
	public Query rewrite(IndexReader reader) throws IOException {
	if (terms.length == 0) {
	return new MatchNoDocsQuery("empty PhraseQuery");
	} else if (terms.length == 1) {
	return new TermQuery(terms[0]);
	} else if (positions[0] != 0) {
	int[] newPositions = new int[positions.length];
	for (int i = 0; i < positions.length; ++i) {
	newPositions[i] = positions[i] - positions[0];
	}
	return new PhraseQuery(slop, terms, newPositions);
	} else {
	return super.rewrite(reader);
	}
	}

	@Override
	public void visit(QueryVisitor visitor) {
	if (visitor.acceptField(field) == false) {
	return;
	}
	QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
	v.consumeTerms(this, terms);
	}

	/**
	* Term postings and position information for phrase matching
	*
	* @lucene.internal
	*/
	public static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
	final PostingsEnum postings;
	final ImpactsEnum impacts;
	final int position;
	final Term[] terms;
	final int nTerms; // for faster comparisons

	/** Creates PostingsAndFreq instance */
	public PostingsAndFreq(
	PostingsEnum postings, ImpactsEnum impacts, int position, Term... terms) {
	this.postings = postings;
	this.impacts = impacts;
	this.position = position;
	nTerms = terms == null ? 0 : terms.length;
	if (nTerms > 0) {
	if (terms.length == 1) {
	this.terms = terms;
	} else {
	Term[] terms2 = new Term[terms.length];
	System.arraycopy(terms, 0, terms2, 0, terms.length);
	Arrays.sort(terms2);
	this.terms = terms2;
	}
	} else {
	this.terms = null;
	}
	}

	public PostingsAndFreq(
	PostingsEnum postings, ImpactsEnum impacts, int position, List<Term> terms) {
	this.postings = postings;
	this.impacts = impacts;
	this.position = position;
	nTerms = terms == null ? 0 : terms.size();
	if (nTerms > 0) {
	Term[] terms2 = terms.toArray(new Term[0]);
	if (nTerms > 1) {
	Arrays.sort(terms2);
	}
	this.terms = terms2;
	} else {
	this.terms = null;
	}
	}

	@Override
	public int compareTo(PostingsAndFreq other) {
	if (position != other.position) {
	return position - other.position;
	}
	if (nTerms != other.nTerms) {
	return nTerms - other.nTerms;
	}
	if (nTerms == 0) {
	return 0;
	}
	for (int i = 0; i < terms.length; i++) {
	int res = terms[i].compareTo(other.terms[i]);
	if (res != 0) return res;
	}
	return 0;
	}

	@Override
	public int hashCode() {
	final int prime = 31;
	int result = 1;
	result = prime * result + position;
	for (int i = 0; i < nTerms; i++) {
	result = prime * result + terms[i].hashCode();
	}
	return result;
	}

	@Override
	public boolean equals(Object obj) {
	if (this == obj) return true;
	if (obj == null) return false;
	if (getClass() != obj.getClass()) return false;
	PostingsAndFreq other = (PostingsAndFreq) obj;
	if (position != other.position) return false;
	if (terms == null) return other.terms == null;
	return Arrays.equals(terms, other.terms);
	}
	}

	/**
	* A guess of the average number of simple operations for the initial seek and buffer refill per
	* document for the positions of a term. See also {@link
	* Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()}.
	*
	* <p>Aside: Instead of being constant this could depend among others on {@link
	* Lucene90PostingsFormat#BLOCK_SIZE}, {@link TermsEnum#docFreq()}, {@link
	* TermsEnum#totalTermFreq()}, {@link DocIdSetIterator#cost()} (expected number of matching docs),
	* {@link LeafReader#maxDoc()} (total number of docs in the segment), and the seek time and block
	* size of the device storing the index.
	*/
	private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;

	/**
	* Number of simple operations in {@link
	* Lucene90PostingsReader.BlockImpactsPostingsEnum#nextPosition()} when no seek or buffer refill
	* is done.
	*/
	private static final int TERM_OPS_PER_POS = 7;

	/**
	* Returns an expected cost in simple operations of processing the occurrences of a term in a
	* document that contains the term. This is for use by {@link TwoPhaseIterator#matchCost}
	* implementations.
	*
	* @param termsEnum The term is the term at which this TermsEnum is positioned.
	*/
	public static float termPositionsCost(TermsEnum termsEnum) throws IOException {
	int docFreq = termsEnum.docFreq();
	assert docFreq > 0;
	long totalTermFreq = termsEnum.totalTermFreq();
	float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
	return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
	}

	@Override
	public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
	throws IOException {
	return new PhraseWeight(this, field, searcher, scoreMode) {

	private transient TermStates[] states;

	@Override
	protected Similarity.SimScorer getStats(IndexSearcher searcher) throws IOException {
	final int[] positions = PhraseQuery.this.getPositions();
	if (positions.length < 2) {
	throw new IllegalStateException(
	"PhraseWeight does not support less than 2 terms, call rewrite first");
	} else if (positions[0] != 0) {
	throw new IllegalStateException(
	"PhraseWeight requires that the first position is 0, call rewrite first");
	}
	final IndexReaderContext context = searcher.getTopReaderContext();
	states = new TermStates[terms.length];
	TermStatistics[] termStats = new TermStatistics[terms.length];
	int termUpTo = 0;
	for (int i = 0; i < terms.length; i++) {
	final Term term = terms[i];
	states[i] = TermStates.build(context, term, scoreMode.needsScores());
	if (scoreMode.needsScores()) {
	TermStates ts = states[i];
	if (ts.docFreq() > 0) {
	termStats[termUpTo++] =
	searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq());
	}
	}
	}
	if (termUpTo > 0) {
	return similarity.scorer(
	boost,
	searcher.collectionStatistics(field),
	ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
	} else {
	return null; // no terms at all, we won't use similarity
	}
	}

	@Override
	protected PhraseMatcher getPhraseMatcher(
	LeafReaderContext context, SimScorer scorer, boolean exposeOffsets) throws IOException {
	assert terms.length > 0;
	final LeafReader reader = context.reader();
	PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length];

	final Terms fieldTerms = reader.terms(field);
	if (fieldTerms == null) {
	return null;
	}

	if (fieldTerms.hasPositions() == false) {
	throw new IllegalStateException(
	"field \""
	+ field
	+ "\" was indexed without position data; cannot run PhraseQuery (phrase="
	+ getQuery()
	+ ")");
	}

	// Reuse single TermsEnum below:
	final TermsEnum te = fieldTerms.iterator();
	float totalMatchCost = 0;

	for (int i = 0; i < terms.length; i++) {
	final Term t = terms[i];
	final TermState state = states[i].get(context);
	if (state == null) {
	/* term doesnt exist in this segment */
	assert termNotInReader(reader, t) : "no termstate found but term exists in reader";
	return null;
	}
	te.seekExact(t.bytes(), state);
	PostingsEnum postingsEnum;
	ImpactsEnum impactsEnum;
	if (scoreMode == ScoreMode.TOP_SCORES) {
	postingsEnum =
	impactsEnum =
	te.impacts(exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
	} else {
	postingsEnum =
	te.postings(null, exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
	impactsEnum = new SlowImpactsEnum(postingsEnum);
	}
	postingsFreqs[i] = new PostingsAndFreq(postingsEnum, impactsEnum, positions[i], t);
	totalMatchCost += termPositionsCost(te);
	}

	// sort by increasing docFreq order
	if (slop == 0) {
	ArrayUtil.timSort(postingsFreqs);
	return new ExactPhraseMatcher(postingsFreqs, scoreMode, scorer, totalMatchCost);
	} else {
	return new SloppyPhraseMatcher(
	postingsFreqs, slop, scoreMode, scorer, totalMatchCost, exposeOffsets);
	}
	}
	};
	}

	// only called from assert
	private static boolean termNotInReader(LeafReader reader, Term term) throws IOException {
	return reader.docFreq(term) == 0;
	}

	/** Prints a user-readable version of this query. */
	@Override
	public String toString(String f) {
	StringBuilder buffer = new StringBuilder();
	if (field != null && !field.equals(f)) {
	buffer.append(field);
	buffer.append(":");
	}

	buffer.append("\"");
	final int maxPosition;
	if (positions.length == 0) {
	maxPosition = -1;
	} else {
	maxPosition = positions[positions.length - 1];
	}
	String[] pieces = new String[maxPosition + 1];
	for (int i = 0; i < terms.length; i++) {
	int pos = positions[i];
	String s = pieces[pos];
	if (s == null) {
	s = (terms[i]).text();
	} else {
	s = s + "\|" + (terms[i]).text();
	}
	pieces[pos] = s;
	}
	for (int i = 0; i < pieces.length; i++) {
	if (i > 0) {
	buffer.append(' ');
	}
	String s = pieces[i];
	if (s == null) {
	buffer.append('?');
	} else {
	buffer.append(s);
	}
	}
	buffer.append("\"");

	if (slop != 0) {
	buffer.append("~");
	buffer.append(slop);
	}

	return buffer.toString();
	}

	/** Returns true iff <code>o</code> is equal to this. */
	@Override
	public boolean equals(Object other) {
	return sameClassAs(other) && equalsTo(getClass().cast(other));
	}

	private boolean equalsTo(PhraseQuery other) {
	return slop == other.slop
	&& Arrays.equals(terms, other.terms)
	&& Arrays.equals(positions, other.positions);
	}

	/** Returns a hash code value for this object. */
	@Override
	public int hashCode() {
	int h = classHash();
	h = 31 * h + slop;
	h = 31 * h + Arrays.hashCode(terms);
	h = 31 * h + Arrays.hashCode(positions);
	return h;
	}
	}