lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search.uhighlight;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Function;
 import java.util.function.Predicate;

 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.highlight.WeightedSpanTerm;
 import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
 import org.apache.lucene.search.spans.SpanCollector;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanScorer;
 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;

 /**
  * Helps the {@link FieldOffsetStrategy} with position sensitive queries (e.g. highlight phrases correctly).
  * This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
  * documents.  Despite this state, it's immutable after construction.
  *
  * @lucene.internal
  */
 // TODO rename to SpanHighlighting ?
 public class PhraseHelper {

   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
       (s) -> false, spanQuery -> null, query -> null, true);

   private final String fieldName;
   private final Set<BytesRef> positionInsensitiveTerms; // (TermQuery terms)
   private final Set<SpanQuery> spanQueries;
   private final boolean willRewrite;
   private final Predicate<String> fieldMatcher;

   /**
    * Constructor.
    * {@code rewriteQueryPred} is an extension hook to override the default choice of
    * {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
    * so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
    * Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
    * to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
    * {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
    * usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
    * {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
    */
   public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
                       Function<Query, Collection<Query>> preExtractRewriteFunction,
                       boolean ignoreQueriesNeedingRewrite) {
     this.fieldName = field;
     this.fieldMatcher = fieldMatcher;
     // filter terms to those we want
     positionInsensitiveTerms = new HashSet<>();
     spanQueries = new HashSet<>();

     // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls

     boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class

     // For TermQueries or other position insensitive queries, collect the Terms.
     // For other Query types, WSTE will convert to an equivalent SpanQuery.  NOT extracting position spans here.
     new WeightedSpanTermExtractor(field) {
       //anonymous constructor
       {
         setExpandMultiTermQuery(true); //necessary for mustRewriteQuery(spanQuery) to work.

         try {
           extract(query, 1f, null); // null because we won't actually extract right now; we're not collecting
         } catch (Exception e) {
           throw new RuntimeException(e);
         }
       }

       @Override
       protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
         Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
         if (newQueriesToExtract != null) {
           for (Query newQuery : newQueriesToExtract) {
             extract(newQuery, boost, terms);
           }
         } else {
           super.extract(query, boost, terms);
         }
       }

       @Override
       protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
         if (clazz.isAssignableFrom(MultiTermQuery.class)) {
           return true; //We do MTQ processing separately in MultiTermHighlighting.java
         }
         return true; //TODO set to false and provide a hook to customize certain queries.
       }

       // called on Query types that are NOT position sensitive, e.g. TermQuery
       @Override
       protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost) {
         query.visit(new QueryVisitor() {
           @Override
           public boolean acceptField(String field) {
             return fieldMatcher.test(field);
           }
           @Override
           public void consumeTerms(Query query, Term... terms) {
             for (Term term : terms) {
               positionInsensitiveTerms.add(term.bytes());
             }
           }
         });
       }

       // called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand
       @Override
       protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
                                               float boost) throws IOException {
         // if this span query isn't for this field, skip it.
         Set<String> fieldNameSet = new HashSet<>();//TODO reuse.  note: almost always size 1
         collectSpanQueryFields(spanQuery, fieldNameSet);
         for (String spanField : fieldNameSet) {
           if (!fieldMatcher.test(spanField)) {
             return;
           }
         }

         boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
         if (ignoreQueriesNeedingRewrite && mustRewriteQuery) {
           return;// ignore this query
         }
         mustRewriteHolder[0] |= mustRewriteQuery;

         spanQueries.add(spanQuery);
       }

       @Override
       protected boolean mustRewriteQuery(SpanQuery spanQuery) {
         Boolean rewriteQ = rewriteQueryPred.apply(spanQuery);// allow to override
         return rewriteQ != null ? rewriteQ : super.mustRewriteQuery(spanQuery);
       }
     }; // calling the constructor triggered the extraction/visiting we want.  Hacky; yes.

     willRewrite = mustRewriteHolder[0];
   }

   public Set<SpanQuery> getSpanQueries() {
     return spanQueries;
   }

   /**
    * If there is no position sensitivity then use of the instance of this class can be ignored.
    */
   public boolean hasPositionSensitivity() {
     return spanQueries.isEmpty() == false;
   }

   /**
    * Rewrite is needed for handling a {@link SpanMultiTermQueryWrapper} (MTQ / wildcards) or some
    * custom things.  When true, the resulting term list will probably be different than what it was known
    * to be initially.
    */
   public boolean willRewrite() {
     return willRewrite;
   }

   /** Returns the terms that are position-insensitive (sorted). */
   public BytesRef[] getAllPositionInsensitiveTerms() {
     BytesRef[] result = positionInsensitiveTerms.toArray(new BytesRef[positionInsensitiveTerms.size()]);
     Arrays.sort(result);
     return result;
   }

   /** Given the internal SpanQueries, produce a number of OffsetsEnum into the {@code results} param. */
   public void createOffsetsEnumsForSpans(LeafReader leafReader, int docId, List<OffsetsEnum> results) throws IOException {
     leafReader = new SingleFieldWithOffsetsFilterLeafReader(leafReader, fieldName);
     //TODO avoid searcher and do what it does to rewrite & get weight?
     IndexSearcher searcher = new IndexSearcher(leafReader);
     searcher.setQueryCache(null);

     // for each SpanQuery, grab it's Spans and put it into a PriorityQueue
     PriorityQueue<Spans> spansPriorityQueue = new PriorityQueue<Spans>(spanQueries.size()) {
       @Override
       protected boolean lessThan(Spans a, Spans b) {
         return a.startPosition() <= b.startPosition();
       }
     };
     for (Query query : spanQueries) {
       Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
       Scorer scorer = weight.scorer(leafReader.getContext());
       if (scorer == null) {
         continue;
       }
       TwoPhaseIterator twoPhaseIterator = scorer.twoPhaseIterator();
       if (twoPhaseIterator != null) {
         if (twoPhaseIterator.approximation().advance(docId) != docId || !twoPhaseIterator.matches()) {
           continue;
         }
       } else if (scorer.iterator().advance(docId) != docId) { // preposition, and return doing nothing if find none
         continue;
       }

       Spans spans = ((SpanScorer) scorer).getSpans();
       assert spans.docID() == docId;
       if (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
         spansPriorityQueue.add(spans);
       }
     }

     // Iterate the Spans in the PriorityQueue, collecting as we go.  By using a PriorityQueue ordered by position,
     //   the underlying offsets in our collector will be mostly appended to the end of arrays (efficient).
     // note: alternatively it'd interesting if we produced one OffsetsEnum that internally advanced
     //   this PriorityQueue when nextPosition is called; it would cap what we have to cache for large docs and
     //   exiting early (due to maxLen) is easy.
     //   But at least we have an accurate "freq" and it shouldn't be too much data to collect.  Even SpanScorer
     //   navigates the spans fully to compute a good freq (and thus score)!
     OffsetSpanCollector spanCollector = new OffsetSpanCollector();
     while (spansPriorityQueue.size() > 0) {
       Spans spans = spansPriorityQueue.top();
       //TODO limit to a capped endOffset length somehow so we can break this loop early
       spans.collect(spanCollector);

       if (spans.nextStartPosition() == Spans.NO_MORE_POSITIONS) {
         spansPriorityQueue.pop();
       } else {
         spansPriorityQueue.updateTop();
       }
     }
     results.addAll(spanCollector.termToOffsetsEnums.values());
   }

   /**
    * Needed to support the ability to highlight a query irrespective of the field a query refers to
    * (aka requireFieldMatch=false).
    * This reader will just delegate every call to a single field in the wrapped
    * LeafReader. This way we ensure that all queries going through this reader target the same field.
    */
   private static final class SingleFieldWithOffsetsFilterLeafReader extends FilterLeafReader {
     final String fieldName;

     SingleFieldWithOffsetsFilterLeafReader(LeafReader in, String fieldName) {
       super(in);
       this.fieldName = fieldName;
     }

     @Override
     public FieldInfos getFieldInfos() {
       throw new UnsupportedOperationException();//TODO merge them
     }

     @Override
     public Terms terms(String field) throws IOException {
       // ensure the underlying PostingsEnum returns offsets.  It's sad we have to do this to use the SpanCollector.
       return new FilterTerms(super.terms(fieldName)) {
         @Override
         public TermsEnum iterator() throws IOException {
           return new FilterTermsEnum(in.iterator()) {
             @Override
             public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
               return super.postings(reuse, flags | PostingsEnum.OFFSETS);
             }
           };
         }
       };
     }

     @Override
     public NumericDocValues getNormValues(String field) throws IOException {
       return super.getNormValues(fieldName);
     }

     @Override
     public CacheHelper getCoreCacheHelper() {
       return null;
     }

     @Override
     public CacheHelper getReaderCacheHelper() {
       return null;
     }
   }

   private class OffsetSpanCollector implements SpanCollector {
     Map<BytesRef, SpanCollectedOffsetsEnum> termToOffsetsEnums = new HashMap<>();

     @Override
     public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
       if (!fieldMatcher.test(term.field())) {
         return;
       }

       SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes());
       if (offsetsEnum == null) {
         // If it's pos insensitive we handle it outside of PhraseHelper.  term.field() is from the Query.
         if (positionInsensitiveTerms.contains(term.bytes())) {
           return;
         }
         offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq());
         termToOffsetsEnums.put(term.bytes(), offsetsEnum);
       }
       offsetsEnum.add(postings.startOffset(), postings.endOffset());
     }

     @Override
     public void reset() { // called when at a new position.  We don't care.
     }
   }

   private static class SpanCollectedOffsetsEnum extends OffsetsEnum {
     // TODO perhaps optionally collect (and expose) payloads?
     private final BytesRef term;
     private final int[] startOffsets;
     private final int[] endOffsets;
     private int numPairs = 0;
     private int enumIdx = -1;

     private SpanCollectedOffsetsEnum(BytesRef term, int postingsFreq) {
       this.term = term;
       this.startOffsets = new int[postingsFreq]; // hopefully not wasteful?  At least we needn't resize it.
       this.endOffsets = new int[postingsFreq];
     }

     // called from collector before it's navigated
     void add(int startOffset, int endOffset) {
       assert enumIdx == -1 : "bad state";

       // loop backwards since we expect a match at the end or close to it.  We expect O(1) not O(N).
       int pairIdx = numPairs - 1;
       for (; pairIdx >= 0; pairIdx--) {
         int iStartOffset = startOffsets[pairIdx];
         int iEndOffset = endOffsets[pairIdx];
         int cmp = Integer.compare(iStartOffset, startOffset);
         if (cmp == 0) {
           cmp = Integer.compare(iEndOffset, endOffset);
         }
         if (cmp == 0) {
           return; // we already have this offset-pair for this term
         } else if (cmp < 0) {
           break; //we will insert offsetPair to the right of pairIdx
         }
       }
       // pairIdx is now one position to the left of where we insert the new pair
       // shift right any pairs by one to make room
       final int shiftLen = numPairs - (pairIdx + 1);
       if (shiftLen > 0) {
         System.arraycopy(startOffsets, pairIdx + 1, startOffsets, pairIdx + 2, shiftLen);
         System.arraycopy(endOffsets, pairIdx + 1, endOffsets, pairIdx + 2, shiftLen);
       }
       // now we can place the offset pair
       startOffsets[pairIdx + 1] = startOffset;
       endOffsets[pairIdx + 1] = endOffset;
       numPairs++;
     }

     @Override
     public boolean nextPosition() throws IOException {
       return ++enumIdx < numPairs;
     }

     @Override
     public int freq() throws IOException {
       return numPairs;
     }

     @Override
     public BytesRef getTerm() throws IOException {
       return term;
     }

     @Override
     public int startOffset() throws IOException {
       return startOffsets[enumIdx];
     }

     @Override
     public int endOffset() throws IOException {
       return endOffsets[enumIdx];
     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search.uhighlight;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;
	import java.util.function.Function;
	import java.util.function.Predicate;

	import org.apache.lucene.index.FieldInfos;
	import org.apache.lucene.index.FilterLeafReader;
	import org.apache.lucene.index.LeafReader;
	import org.apache.lucene.index.NumericDocValues;
	import org.apache.lucene.index.PostingsEnum;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.Terms;
	import org.apache.lucene.index.TermsEnum;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.MatchAllDocsQuery;
	import org.apache.lucene.search.MultiTermQuery;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.QueryVisitor;
	import org.apache.lucene.search.ScoreMode;
	import org.apache.lucene.search.Scorer;
	import org.apache.lucene.search.TwoPhaseIterator;
	import org.apache.lucene.search.Weight;
	import org.apache.lucene.search.highlight.WeightedSpanTerm;
	import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
	import org.apache.lucene.search.spans.SpanCollector;
	import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
	import org.apache.lucene.search.spans.SpanQuery;
	import org.apache.lucene.search.spans.SpanScorer;
	import org.apache.lucene.search.spans.Spans;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.PriorityQueue;

	/**
	* Helps the {@link FieldOffsetStrategy} with position sensitive queries (e.g. highlight phrases correctly).
	* This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
	* documents. Despite this state, it's immutable after construction.
	*
	* @lucene.internal
	*/
	// TODO rename to SpanHighlighting ?
	public class PhraseHelper {

	public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
	(s) -> false, spanQuery -> null, query -> null, true);

	private final String fieldName;
	private final Set<BytesRef> positionInsensitiveTerms; // (TermQuery terms)
	private final Set<SpanQuery> spanQueries;
	private final boolean willRewrite;
	private final Predicate<String> fieldMatcher;

	/**
	* Constructor.
	* {@code rewriteQueryPred} is an extension hook to override the default choice of
	* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
	* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
	* Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
	* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
	* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
	* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
	* {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
	*/
	public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
	Function<Query, Collection<Query>> preExtractRewriteFunction,
	boolean ignoreQueriesNeedingRewrite) {
	this.fieldName = field;
	this.fieldMatcher = fieldMatcher;
	// filter terms to those we want
	positionInsensitiveTerms = new HashSet<>();
	spanQueries = new HashSet<>();

	// TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls

	boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class

	// For TermQueries or other position insensitive queries, collect the Terms.
	// For other Query types, WSTE will convert to an equivalent SpanQuery. NOT extracting position spans here.
	new WeightedSpanTermExtractor(field) {
	//anonymous constructor
	{
	setExpandMultiTermQuery(true); //necessary for mustRewriteQuery(spanQuery) to work.

	try {
	extract(query, 1f, null); // null because we won't actually extract right now; we're not collecting
	} catch (Exception e) {
	throw new RuntimeException(e);
	}
	}

	@Override
	protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
	Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
	if (newQueriesToExtract != null) {
	for (Query newQuery : newQueriesToExtract) {
	extract(newQuery, boost, terms);
	}
	} else {
	super.extract(query, boost, terms);
	}
	}

	@Override
	protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
	if (clazz.isAssignableFrom(MultiTermQuery.class)) {
	return true; //We do MTQ processing separately in MultiTermHighlighting.java
	}
	return true; //TODO set to false and provide a hook to customize certain queries.
	}

	// called on Query types that are NOT position sensitive, e.g. TermQuery
	@Override
	protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost) {
	query.visit(new QueryVisitor() {
	@Override
	public boolean acceptField(String field) {
	return fieldMatcher.test(field);
	}
	@Override
	public void consumeTerms(Query query, Term... terms) {
	for (Term term : terms) {
	positionInsensitiveTerms.add(term.bytes());
	}
	}
	});
	}

	// called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand
	@Override
	protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
	float boost) throws IOException {
	// if this span query isn't for this field, skip it.
	Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
	collectSpanQueryFields(spanQuery, fieldNameSet);
	for (String spanField : fieldNameSet) {
	if (!fieldMatcher.test(spanField)) {
	return;
	}
	}

	boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
	if (ignoreQueriesNeedingRewrite && mustRewriteQuery) {
	return;// ignore this query
	}
	mustRewriteHolder[0] \|= mustRewriteQuery;

	spanQueries.add(spanQuery);
	}

	@Override
	protected boolean mustRewriteQuery(SpanQuery spanQuery) {
	Boolean rewriteQ = rewriteQueryPred.apply(spanQuery);// allow to override
	return rewriteQ != null ? rewriteQ : super.mustRewriteQuery(spanQuery);
	}
	}; // calling the constructor triggered the extraction/visiting we want. Hacky; yes.

	willRewrite = mustRewriteHolder[0];
	}

	public Set<SpanQuery> getSpanQueries() {
	return spanQueries;
	}

	/**
	* If there is no position sensitivity then use of the instance of this class can be ignored.
	*/
	public boolean hasPositionSensitivity() {
	return spanQueries.isEmpty() == false;
	}

	/**
	* Rewrite is needed for handling a {@link SpanMultiTermQueryWrapper} (MTQ / wildcards) or some
	* custom things. When true, the resulting term list will probably be different than what it was known
	* to be initially.
	*/
	public boolean willRewrite() {
	return willRewrite;
	}

	/** Returns the terms that are position-insensitive (sorted). */
	public BytesRef[] getAllPositionInsensitiveTerms() {
	BytesRef[] result = positionInsensitiveTerms.toArray(new BytesRef[positionInsensitiveTerms.size()]);
	Arrays.sort(result);
	return result;
	}

	/** Given the internal SpanQueries, produce a number of OffsetsEnum into the {@code results} param. */
	public void createOffsetsEnumsForSpans(LeafReader leafReader, int docId, List<OffsetsEnum> results) throws IOException {
	leafReader = new SingleFieldWithOffsetsFilterLeafReader(leafReader, fieldName);
	//TODO avoid searcher and do what it does to rewrite & get weight?
	IndexSearcher searcher = new IndexSearcher(leafReader);
	searcher.setQueryCache(null);

	// for each SpanQuery, grab it's Spans and put it into a PriorityQueue
	PriorityQueue<Spans> spansPriorityQueue = new PriorityQueue<Spans>(spanQueries.size()) {
	@Override
	protected boolean lessThan(Spans a, Spans b) {
	return a.startPosition() <= b.startPosition();
	}
	};
	for (Query query : spanQueries) {
	Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
	Scorer scorer = weight.scorer(leafReader.getContext());
	if (scorer == null) {
	continue;
	}
	TwoPhaseIterator twoPhaseIterator = scorer.twoPhaseIterator();
	if (twoPhaseIterator != null) {
	if (twoPhaseIterator.approximation().advance(docId) != docId \|\| !twoPhaseIterator.matches()) {
	continue;
	}
	} else if (scorer.iterator().advance(docId) != docId) { // preposition, and return doing nothing if find none
	continue;
	}

	Spans spans = ((SpanScorer) scorer).getSpans();
	assert spans.docID() == docId;
	if (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
	spansPriorityQueue.add(spans);
	}
	}

	// Iterate the Spans in the PriorityQueue, collecting as we go. By using a PriorityQueue ordered by position,
	// the underlying offsets in our collector will be mostly appended to the end of arrays (efficient).
	// note: alternatively it'd interesting if we produced one OffsetsEnum that internally advanced
	// this PriorityQueue when nextPosition is called; it would cap what we have to cache for large docs and
	// exiting early (due to maxLen) is easy.
	// But at least we have an accurate "freq" and it shouldn't be too much data to collect. Even SpanScorer
	// navigates the spans fully to compute a good freq (and thus score)!
	OffsetSpanCollector spanCollector = new OffsetSpanCollector();
	while (spansPriorityQueue.size() > 0) {
	Spans spans = spansPriorityQueue.top();
	//TODO limit to a capped endOffset length somehow so we can break this loop early
	spans.collect(spanCollector);

	if (spans.nextStartPosition() == Spans.NO_MORE_POSITIONS) {
	spansPriorityQueue.pop();
	} else {
	spansPriorityQueue.updateTop();
	}
	}
	results.addAll(spanCollector.termToOffsetsEnums.values());
	}

	/**
	* Needed to support the ability to highlight a query irrespective of the field a query refers to
	* (aka requireFieldMatch=false).
	* This reader will just delegate every call to a single field in the wrapped
	* LeafReader. This way we ensure that all queries going through this reader target the same field.
	*/
	private static final class SingleFieldWithOffsetsFilterLeafReader extends FilterLeafReader {
	final String fieldName;

	SingleFieldWithOffsetsFilterLeafReader(LeafReader in, String fieldName) {
	super(in);
	this.fieldName = fieldName;
	}

	@Override
	public FieldInfos getFieldInfos() {
	throw new UnsupportedOperationException();//TODO merge them
	}

	@Override
	public Terms terms(String field) throws IOException {
	// ensure the underlying PostingsEnum returns offsets. It's sad we have to do this to use the SpanCollector.
	return new FilterTerms(super.terms(fieldName)) {
	@Override
	public TermsEnum iterator() throws IOException {
	return new FilterTermsEnum(in.iterator()) {
	@Override
	public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
	return super.postings(reuse, flags \| PostingsEnum.OFFSETS);
	}
	};
	}
	};
	}

	@Override
	public NumericDocValues getNormValues(String field) throws IOException {
	return super.getNormValues(fieldName);
	}

	@Override
	public CacheHelper getCoreCacheHelper() {
	return null;
	}

	@Override
	public CacheHelper getReaderCacheHelper() {
	return null;
	}
	}

	private class OffsetSpanCollector implements SpanCollector {
	Map<BytesRef, SpanCollectedOffsetsEnum> termToOffsetsEnums = new HashMap<>();

	@Override
	public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
	if (!fieldMatcher.test(term.field())) {
	return;
	}

	SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes());
	if (offsetsEnum == null) {
	// If it's pos insensitive we handle it outside of PhraseHelper. term.field() is from the Query.
	if (positionInsensitiveTerms.contains(term.bytes())) {
	return;
	}
	offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq());
	termToOffsetsEnums.put(term.bytes(), offsetsEnum);
	}
	offsetsEnum.add(postings.startOffset(), postings.endOffset());
	}

	@Override
	public void reset() { // called when at a new position. We don't care.
	}
	}

	private static class SpanCollectedOffsetsEnum extends OffsetsEnum {
	// TODO perhaps optionally collect (and expose) payloads?
	private final BytesRef term;
	private final int[] startOffsets;
	private final int[] endOffsets;
	private int numPairs = 0;
	private int enumIdx = -1;

	private SpanCollectedOffsetsEnum(BytesRef term, int postingsFreq) {
	this.term = term;
	this.startOffsets = new int[postingsFreq]; // hopefully not wasteful? At least we needn't resize it.
	this.endOffsets = new int[postingsFreq];
	}

	// called from collector before it's navigated
	void add(int startOffset, int endOffset) {
	assert enumIdx == -1 : "bad state";

	// loop backwards since we expect a match at the end or close to it. We expect O(1) not O(N).
	int pairIdx = numPairs - 1;
	for (; pairIdx >= 0; pairIdx--) {
	int iStartOffset = startOffsets[pairIdx];
	int iEndOffset = endOffsets[pairIdx];
	int cmp = Integer.compare(iStartOffset, startOffset);
	if (cmp == 0) {
	cmp = Integer.compare(iEndOffset, endOffset);
	}
	if (cmp == 0) {
	return; // we already have this offset-pair for this term
	} else if (cmp < 0) {
	break; //we will insert offsetPair to the right of pairIdx
	}
	}
	// pairIdx is now one position to the left of where we insert the new pair
	// shift right any pairs by one to make room
	final int shiftLen = numPairs - (pairIdx + 1);
	if (shiftLen > 0) {
	System.arraycopy(startOffsets, pairIdx + 1, startOffsets, pairIdx + 2, shiftLen);
	System.arraycopy(endOffsets, pairIdx + 1, endOffsets, pairIdx + 2, shiftLen);
	}
	// now we can place the offset pair
	startOffsets[pairIdx + 1] = startOffset;
	endOffsets[pairIdx + 1] = endOffset;
	numPairs++;
	}

	@Override
	public boolean nextPosition() throws IOException {
	return ++enumIdx < numPairs;
	}

	@Override
	public int freq() throws IOException {
	return numPairs;
	}

	@Override
	public BytesRef getTerm() throws IOException {
	return term;
	}

	@Override
	public int startOffset() throws IOException {
	return startOffsets[enumIdx];
	}

	@Override
	public int endOffset() throws IOException {
	return endOffsets[enumIdx];
	}
	}

	}