src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs - lucenenet - Git at Google

 #if FEATURE_BREAKITERATOR
 using J2N.Text;
 using ICU4N.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Index;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using Lucene.Net.Util.Automaton;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Text;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Search.PostingsHighlight
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Simple highlighter that does not analyze fields nor use
     /// term vectors. Instead it requires
     /// <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/>.
     /// </summary>
     /// <remarks>
     /// PostingsHighlighter treats the single original document as the whole corpus, and then scores individual
     /// passages as if they were documents in this corpus. It uses a <see cref="BreakIterator"/> to find
     /// passages in the text; by default it breaks using <see cref="BreakIterator.GetSentenceInstance(CultureInfo)"/> (for sentence breaking).
     /// It then iterates in parallel (merge sorting by offset) through
     /// the positions of all terms from the query, coalescing those hits that occur in a single passage
     /// into a <see cref="Passage"/>, and then scores each Passage using a separate <see cref="PassageScorer"/>.
     /// Passages are finally formatted into highlighted snippets with a <see cref="PassageFormatter"/>.
     /// <para/>
     /// You can customize the behavior by subclassing this highlighter, some important hooks:
     /// <list type="bullet">
     ///     <item><description><see cref="GetBreakIterator(string)"/>: Customize how the text is divided into passages.</description></item>
     ///     <item><description><see cref="GetScorer(string)"/>: Customize how passages are ranked.</description></item>
     ///     <item><description><see cref="GetFormatter(string)"/>: Customize how snippets are formatted.</description></item>
     ///     <item><description><see cref="GetIndexAnalyzer(string)"/>: Enable highlighting of MultiTermQuerys such as <see cref="WildcardQuery"/>.</description></item>
     /// </list>
     /// <para/>
     /// <b>WARNING</b>: The code is very new and probably still has some exciting bugs!
     /// <para/>
     /// Example usage:
     /// <code>
     ///     // configure field with offsets at index time
     ///     IndexableFieldType offsetsType = new IndexableFieldType(TextField.TYPE_STORED);
     ///     offsetsType.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     ///     Field body = new Field("body", "foobar", offsetsType);
     ///
     ///     // retrieve highlights at query time
     ///     ICUPostingsHighlighter highlighter = new ICUPostingsHighlighter();
     ///     Query query = new TermQuery(new Term("body", "highlighting"));
     ///     TopDocs topDocs = searcher.Search(query, n);
     ///     string highlights[] = highlighter.Highlight("body", query, searcher, topDocs);
     /// </code>
     /// <para/>
     /// This is thread-safe, and can be used across different readers.
     /// <para/>
     /// Note that the .NET implementation differs from the <c>PostingsHighlighter</c> in Lucene in
     /// that it is backed by an ICU <see cref="RuleBasedBreakIterator"/>, which differs slightly in default behavior
     /// than the one in the JDK. However, the ICU <see cref="RuleBasedBreakIterator"/> behavior can be customized
     /// to meet a lot of scenarios that the one in the JDK cannot. See the ICU documentation at
     /// <a href="http://userguide.icu-project.org/boundaryanalysis/break-rules">http://userguide.icu-project.org/boundaryanalysis/break-rules</a>
     /// for more information how to pass custom rules to an ICU <see cref="RuleBasedBreakIterator"/>.
     /// <para/>
     /// @lucene.experimental
     /// </remarks>
     [ExceptionToClassNameConvention]
     public class ICUPostingsHighlighter // LUCENENET specific - renamed ICUPostingsHighlighter to reflect the change in default behavior
     {
         // TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
         // but if the analyzer is really fast and the field is tiny, this might really be
         // unnecessary.

         /// <summary>for rewriting: we don't want slow processing from MTQs</summary>
         private static readonly IndexReader EMPTY_INDEXREADER = new MultiReader();

         /// <summary>
         /// Default maximum content size to process. Typically snippets
         /// closer to the beginning of the document better summarize its content
         /// </summary>
         public static readonly int DEFAULT_MAX_LENGTH = 10000;

         private readonly int maxLength;

         /// <summary>
         /// Set the first time <see cref="GetFormatter(string)"/> is called,
         /// and then reused.
         /// </summary>
         private PassageFormatter defaultFormatter;

         /// <summary>
         /// Set the first time <see cref="GetScorer(string)"/> is called, and then reused.
         /// </summary>
         private PassageScorer defaultScorer;

         /// <summary>
         /// Creates a new highlighter with <see cref="DEFAULT_MAX_LENGTH"/>.
         /// </summary>
         public ICUPostingsHighlighter()
             : this(DEFAULT_MAX_LENGTH)
         {
         }

         /// <summary>
         /// Creates a new highlighter, specifying maximum content length.
         /// </summary>
         /// <param name="maxLength">maximum content size to process.</param>
         /// <exception cref="ArgumentException">if <paramref name="maxLength"/> is negative or <c>int.MaxValue</c></exception>
         public ICUPostingsHighlighter(int maxLength)
         {
             if (maxLength < 0 || maxLength == int.MaxValue)
             {
                 // two reasons: no overflow problems in BreakIterator.preceding(offset+1),
                 // our sentinel in the offsets queue uses this value to terminate.
                 throw new ArgumentException("maxLength must be < System.Int32.MaxValue");
             }
             this.maxLength = maxLength;
         }

         /// <summary>
         /// Returns the <see cref="BreakIterator"/> to use for
         /// dividing text into passages.  This instantiates an
         /// <see cref="BreakIterator.GetSentenceInstance(CultureInfo)"/> by default;
         /// subclasses can override to customize.
         /// </summary>
         protected virtual BreakIterator GetBreakIterator(string field)
         {
             return BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
         }

         /// <summary>
         /// Returns the <see cref="PassageFormatter"/> to use for
         /// formatting passages into highlighted snippets.  This
         /// returns a new <see cref="PassageFormatter"/> by default;
         /// subclasses can override to customize.
         /// </summary>
         protected virtual PassageFormatter GetFormatter(string field)
         {
             if (defaultFormatter == null)
             {
                 defaultFormatter = new DefaultPassageFormatter();
             }
             return defaultFormatter;
         }

         /// <summary>
         /// Returns the <see cref="PassageScorer"/> to use for
         /// ranking passages.  This
         /// returns a new <see cref="PassageScorer"/> by default;
         /// subclasses can override to customize.
         /// </summary>
         protected virtual PassageScorer GetScorer(string field)
         {
             if (defaultScorer == null)
             {
                 defaultScorer = new PassageScorer();
             }
             return defaultScorer;
         }

         /// <summary>
         /// Highlights the top passages from a single field.
         /// </summary>
         /// <param name="field">field name to highlight. Must have a stored string value and also be indexed with offsets.</param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
         /// <returns>
         /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>.
         /// If no highlights were found for a document, the
         /// first sentence for the field will be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs)
         {
             return Highlight(field, query, searcher, topDocs, 1);
         }

         /// <summary>
         /// Highlights the top-N passages from a single field.
         /// </summary>
         /// <param name="field">
         /// field name to highlight.
         /// Must have a stored string value and also be indexed with offsets.
         /// </param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
         /// <param name="maxPassages">The maximum number of top-N ranked passages used to form the highlighted snippets.</param>
         /// <returns>
         /// Array of formatted snippets corresponding to the documents in <paramref name="topDocs"/>.
         /// If no highlights were found for a document, the
         /// first <paramref name="maxPassages"/> sentences from the
         /// field will be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">Illegal if <paramref name="field"/> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         public virtual string[] Highlight(string field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages)
         {
             IDictionary<string, string[]> res = HighlightFields(new string[] { field }, query, searcher, topDocs, new int[] { maxPassages });
             string[] result;
             res.TryGetValue(field, out result);
             return result;
         }

         /// <summary>
         /// Highlights the top passages from multiple fields.
         /// <para/>
         /// Conceptually, this behaves as a more efficient form of:
         /// <code>
         /// IDictionary&lt;string, string[]&gt; m = new Dictionary&lt;string, string[]&gt;();
         /// foreach (string field in fields)
         /// {
         ///     m[field] = Highlight(field, query, searcher, topDocs);
         /// }
         /// return m;
         /// </code>
         /// </summary>
         /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
         /// <returns>
         /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets
         /// corresponding to the documents in <paramref name="topDocs"/>.
         /// If no highlights were found for a document, the
         /// first sentence from the field will be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         public virtual IDictionary<string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs)
         {
             int[] maxPassages = new int[fields.Length];
             Arrays.Fill(maxPassages, 1);
             return HighlightFields(fields, query, searcher, topDocs, maxPassages);
         }

         /// <summary>
         /// Highlights the top-N passages from multiple fields.
         /// <para/>
         /// Conceptually, this behaves as a more efficient form of:
         /// <code>
         /// IDictionary&lt;string, string[]&gt; m = new Dictionary&lt;string, string[]&gt;();
         /// foreach (string field in fields)
         /// {
         ///     m[field] = Highlight(field, query, searcher, topDocs, maxPassages);
         /// }
         /// return m;
         /// </code>
         /// </summary>
         /// <param name="fields">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="topDocs">TopDocs containing the summary result documents to highlight.</param>
         /// <param name="maxPassages">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
         /// <returns>
         /// <see cref="T:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets
         /// corresponding to the documents in <paramref name="topDocs"/>.
         /// If no highlights were found for a document, the
         /// first <paramref name="maxPassages"/> sentences from the
         /// field will be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         public virtual IDictionary<string, string[]> HighlightFields(string[] fields, Query query, IndexSearcher searcher, TopDocs topDocs, int[] maxPassages)
         {
             ScoreDoc[] scoreDocs = topDocs.ScoreDocs;
             int[] docids = new int[scoreDocs.Length];
             for (int i = 0; i < docids.Length; i++)
             {
                 docids[i] = scoreDocs[i].Doc;
             }

             return HighlightFields(fields, query, searcher, docids, maxPassages);
         }

         /// <summary>
         /// Highlights the top-N passages from multiple fields,
         /// for the provided int[] docids.
         /// </summary>
         /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="docidsIn">containing the document IDs to highlight.</param>
         /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
         /// <returns>
         /// <see cref="F:IDictionary{string, string[]}"/> keyed on field name, containing the array of formatted snippets
         /// corresponding to the documents in <paramref name="docidsIn"/>.
         /// If no highlights were found for a document, the
         /// first <c>maxPassages</c> from the field will
         /// be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         public virtual IDictionary<string, string[]> HighlightFields(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn)
         {
             IDictionary<string, string[]> snippets = new Dictionary<string, string[]>();
             foreach (var ent in HighlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn))
             {
                 object[] snippetObjects = ent.Value;
                 string[] snippetStrings = new string[snippetObjects.Length];
                 snippets[ent.Key] = snippetStrings;
                 for (int i = 0; i < snippetObjects.Length; i++)
                 {
                     object snippet = snippetObjects[i];
                     if (snippet != null)
                     {
                         snippetStrings[i] = snippet.ToString();
                     }
                 }
             }

             return snippets;
         }

         internal class InPlaceMergeSorterAnonymousHelper : InPlaceMergeSorter
         {
             private readonly string[] fields;
             private readonly int[] maxPassages;
             public InPlaceMergeSorterAnonymousHelper(string[] fields, int[] maxPassages)
             {
                 this.fields = fields;
                 this.maxPassages = maxPassages;
             }

             protected override void Swap(int i, int j)
             {
                 string tmp = fields[i];
                 fields[i] = fields[j];
                 fields[j] = tmp;
                 int tmp2 = maxPassages[i];
                 maxPassages[i] = maxPassages[j];
                 maxPassages[j] = tmp2;
             }

             protected override int Compare(int i, int j)
             {
                 return fields[i].CompareToOrdinal(fields[j]);
             }
         }

         /// <summary>
         /// Expert: highlights the top-N passages from multiple fields,
         /// for the provided int[] docids, to custom object as
         /// returned by the <see cref="PassageFormatter"/>.  Use
         /// this API to render to something other than <see cref="string"/>.
         /// </summary>
         /// <param name="fieldsIn">field names to highlight. Must have a stored string value and also be indexed with offsets.</param>
         /// <param name="query">query to highlight.</param>
         /// <param name="searcher">searcher that was previously used to execute the query.</param>
         /// <param name="docidsIn">containing the document IDs to highlight.</param>
         /// <param name="maxPassagesIn">The maximum number of top-N ranked passages per-field used to form the highlighted snippets.</param>
         /// <returns>
         /// <see cref="T:IDictionary{string, object[]}"/> keyed on field name, containing the array of formatted snippets
         /// corresponding to the documents in <paramref name="docidsIn"/>.
         /// If no highlights were found for a document, the
         /// first <paramref name="maxPassagesIn"/> from the field will
         /// be returned.
         /// </returns>
         /// <exception cref="IOException">if an I/O error occurred during processing</exception>
         /// <exception cref="ArgumentException">if <c>field</c> was indexed without <see cref="IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS"/></exception>
         protected internal virtual IDictionary<string, object[]> HighlightFieldsAsObjects(string[] fieldsIn, Query query, IndexSearcher searcher, int[] docidsIn, int[] maxPassagesIn)
         {
             if (fieldsIn.Length < 1)
             {
                 throw new ArgumentException("fieldsIn must not be empty");
             }
             if (fieldsIn.Length != maxPassagesIn.Length)
             {
                 throw new ArgumentException("invalid number of maxPassagesIn");
             }
             IndexReader reader = searcher.IndexReader;
             Query rewritten = Rewrite(query);
             JCG.SortedSet<Term> queryTerms = new JCG.SortedSet<Term>();
             rewritten.ExtractTerms(queryTerms);

             IndexReaderContext readerContext = reader.Context;
             IList<AtomicReaderContext> leaves = readerContext.Leaves;

             // Make our own copies because we sort in-place:
             int[] docids = new int[docidsIn.Length];
             System.Array.Copy(docidsIn, 0, docids, 0, docidsIn.Length);
             string[] fields = new string[fieldsIn.Length];
             System.Array.Copy(fieldsIn, 0, fields, 0, fieldsIn.Length);
             int[] maxPassages = new int[maxPassagesIn.Length];
             System.Array.Copy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.Length);

             // sort for sequential io
             ArrayUtil.TimSort(docids);
             new InPlaceMergeSorterAnonymousHelper(fields, maxPassages).Sort(0, fields.Length);

             // pull stored data:
             IList<string[]> contents = LoadFieldValues(searcher, fields, docids, maxLength);

             IDictionary<string, object[]> highlights = new Dictionary<string, object[]>();
             for (int i = 0; i < fields.Length; i++)
             {
                 string field = fields[i];
                 int numPassages = maxPassages[i];
                 Term floor = new Term(field, "");
                 Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);

                 // LUCENENET: Call custom GetViewBetween overload to mimic Java's exclusive upper bound behavior.
                 var fieldTerms = queryTerms.GetViewBetween(floor, lowerValueInclusive: true, ceiling, upperValueInclusive: false);

                 // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)

                 // Strip off the redundant field:
                 BytesRef[] terms = new BytesRef[fieldTerms.Count];
                 int termUpto = 0;
                 foreach (Term term in fieldTerms)
                 {
                     terms[termUpto++] = term.Bytes;
                 }
                 IDictionary<int, object> fieldHighlights = HighlightField(field, contents[i], GetBreakIterator(field), terms, docids, leaves, numPassages, query);

                 object[] result = new object[docids.Length];
                 for (int j = 0; j < docidsIn.Length; j++)
                 {
                     fieldHighlights.TryGetValue(docidsIn[j], out result[j]);
                 }
                 highlights[field] = result;
             }
             return highlights;
         }

         /// <summary>
         /// Loads the string values for each field X docID to be
         /// highlighted.  By default this loads from stored
         /// fields, but a subclass can change the source.  This
         /// method should allocate the string[fields.length][docids.length]
         /// and fill all values.  The returned strings must be
         /// identical to what was indexed.
         /// </summary>
         protected virtual IList<string[]> LoadFieldValues(IndexSearcher searcher, string[] fields, int[] docids, int maxLength)
         {
             string[][] contents = RectangularArrays.ReturnRectangularArray<string>(fields.Length, docids.Length);
             char[] valueSeparators = new char[fields.Length];
             for (int i = 0; i < fields.Length; i++)
             {
                 valueSeparators[i] = GetMultiValuedSeparator(fields[i]);
             }
             LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength);
             for (int i = 0; i < docids.Length; i++)
             {
                 searcher.Doc(docids[i], visitor);
                 for (int j = 0; j < fields.Length; j++)
                 {
                     contents[j][i] = visitor.GetValue(j); // LUCENENET: No point in doing ToString() on a string
                 }
                 visitor.Reset();
             }
             return contents;
         }

         /// <summary>
         /// Returns the logical separator between values for multi-valued fields.
         /// The default value is a space character, which means passages can span across values,
         /// but a subclass can override, for example with <c>U+2029 PARAGRAPH SEPARATOR (PS)</c>
         /// if each value holds a discrete passage for highlighting.
         /// </summary>
         protected virtual char GetMultiValuedSeparator(string field)
         {
             return ' ';
         }

         /// <summary>
         /// Returns the analyzer originally used to index the content for <paramref name="field"/>.
         /// <para/>
         /// This is used to highlight some <see cref="MultiTermQuery"/>s.
         /// </summary>
         /// <param name="field"></param>
         /// <returns><see cref="Analyzer"/> or null (the default, meaning no special multi-term processing)</returns>
         protected virtual Analyzer GetIndexAnalyzer(string field)
         {
             return null;
         }

         private IDictionary<int, object> HighlightField(string field, string[] contents, BreakIterator bi, BytesRef[] terms, int[] docids, IList<AtomicReaderContext> leaves, int maxPassages, Query query)
         {
             IDictionary<int, object> highlights = new Dictionary<int, object>();

             PassageFormatter fieldFormatter = GetFormatter(field);
             if (fieldFormatter == null)
             {
                 throw new NullReferenceException("PassageFormatter cannot be null");
             }

             // check if we should do any multiterm processing
             Analyzer analyzer = GetIndexAnalyzer(field);
             CharacterRunAutomaton[] automata = new CharacterRunAutomaton[0];
             if (analyzer != null)
             {
                 automata = MultiTermHighlighting.ExtractAutomata(query, field);
             }

             // resize 'terms', where the last term is the multiterm matcher
             if (automata.Length > 0)
             {
                 BytesRef[] newTerms = new BytesRef[terms.Length + 1];
                 System.Array.Copy(terms, 0, newTerms, 0, terms.Length);
                 terms = newTerms;
             }

             // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes
             // otherwise, we will just advance() existing enums to the new document in the same segment.
             DocsAndPositionsEnum[] postings = null;
             TermsEnum termsEnum = null;
             int lastLeaf = -1;

             for (int i = 0; i < docids.Length; i++)
             {
                 string content = contents[i];
                 if (content.Length == 0)
                 {
                     continue; // nothing to do
                 }
                 bi.SetText(content);
                 int doc = docids[i];
                 int leaf = ReaderUtil.SubIndex(doc, leaves);
                 AtomicReaderContext subContext = leaves[leaf];
                 AtomicReader r = subContext.AtomicReader;

                 Debug.Assert(leaf >= lastLeaf); // increasing order

                 // if the segment has changed, we must initialize new enums.
                 if (leaf != lastLeaf)
                 {
                     Terms t = r.GetTerms(field);
                     if (t != null)
                     {
                         termsEnum = t.GetIterator(null);
                         postings = new DocsAndPositionsEnum[terms.Length];
                     }
                 }
                 if (termsEnum == null)
                 {
                     continue; // no terms for this field, nothing to do
                 }

                 // if there are multi-term matches, we have to initialize the "fake" enum for each document
                 if (automata.Length > 0)
                 {
                     DocsAndPositionsEnum dp = MultiTermHighlighting.GetDocsEnum(analyzer.GetTokenStream(field, content), automata);
                     dp.Advance(doc - subContext.DocBase);
                     postings[terms.Length - 1] = dp; // last term is the multiterm matcher
                 }

                 Passage[] passages = HighlightDoc(field, terms, content.Length, bi, doc - subContext.DocBase, termsEnum, postings, maxPassages);

                 if (passages.Length == 0)
                 {
                     // no passages were returned, so ask for a default summary
                     passages = GetEmptyHighlight(field, bi, maxPassages);
                 }

                 if (passages.Length > 0)
                 {
                     highlights[doc] = fieldFormatter.Format(passages, content);
                 }

                 lastLeaf = leaf;
             }

             return highlights;
         }

         internal class HighlightDocComparerAnonymousHelper1 : IComparer<Passage>
         {
             public int Compare(Passage left, Passage right)
             {
                 if (left.score < right.score)
                 {
                     return -1;
                 }
                 else if (left.score > right.score)
                 {
                     return 1;
                 }
                 else
                 {
                     return left.startOffset - right.startOffset;
                 }
             }
         }

         internal class HighlightDocComparerAnonymousHelper2 : IComparer<Passage>
         {
             public int Compare(Passage left, Passage right)
             {
                 return left.startOffset - right.startOffset;
             }
         }

         // algorithm: treat sentence snippets as miniature documents
         // we can intersect these with the postings lists via BreakIterator.preceding(offset),s
         // score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
         private Passage[] HighlightDoc(string field, BytesRef[] terms, int contentLength, BreakIterator bi, int doc,
             TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n)
         {
             PassageScorer scorer = GetScorer(field);
             if (scorer == null)
             {
                 throw new NullReferenceException("PassageScorer cannot be null");
             }
             JCG.PriorityQueue<OffsetsEnum> pq = new JCG.PriorityQueue<OffsetsEnum>();
             float[] weights = new float[terms.Length];
             // initialize postings
             for (int i = 0; i < terms.Length; i++)
             {
                 DocsAndPositionsEnum de = postings[i];
                 int pDoc;
                 if (de == EMPTY)
                 {
                     continue;
                 }
                 else if (de == null)
                 {
                     postings[i] = EMPTY; // initially
                     if (!termsEnum.SeekExact(terms[i]))
                     {
                         continue; // term not found
                     }
                     de = postings[i] = termsEnum.DocsAndPositions(null, null, DocsAndPositionsFlags.OFFSETS);
                     if (de == null)
                     {
                         // no positions available
                         throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                     }
                     pDoc = de.Advance(doc);
                 }
                 else
                 {
                     pDoc = de.DocID;
                     if (pDoc < doc)
                     {
                         pDoc = de.Advance(doc);
                     }
                 }

                 if (doc == pDoc)
                 {
                     weights[i] = scorer.Weight(contentLength, de.Freq);
                     de.NextPosition();
                     pq.Add(new OffsetsEnum(de, i));
                 }
             }

             pq.Add(new OffsetsEnum(EMPTY, int.MaxValue)); // a sentinel for termination

             JCG.PriorityQueue<Passage> passageQueue = new JCG.PriorityQueue<Passage>(n, new HighlightDocComparerAnonymousHelper1());
             Passage current = new Passage();

             while (pq.TryDequeue(out OffsetsEnum off))
             {
                 DocsAndPositionsEnum dp = off.dp;
                 int start = dp.StartOffset;
                 if (start == -1)
                 {
                     throw new ArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
                 }
                 int end = dp.EndOffset;
                 // LUCENE-5166: this hit would span the content limit... however more valid
                 // hits may exist (they are sorted by start). so we pretend like we never
                 // saw this term, it won't cause a passage to be added to passageQueue or anything.
                 Debug.Assert(EMPTY.StartOffset == int.MaxValue);
                 if (start < contentLength && end > contentLength)
                 {
                     continue;
                 }
                 if (start >= current.endOffset)
                 {
                     if (current.startOffset >= 0)
                     {
                         // finalize current
                         current.score *= scorer.Norm(current.startOffset);
                         // new sentence: first add 'current' to queue
                         if (passageQueue.Count == n && current.score < passageQueue.Peek().score)
                         {
                             current.Reset(); // can't compete, just reset it
                         }
                         else
                         {
                             passageQueue.Enqueue(current);
                             if (passageQueue.Count > n)
                             {
                                 current = passageQueue.Dequeue();
                                 current.Reset();
                             }
                             else
                             {
                                 current = new Passage();
                             }
                         }
                     }
                     // if we exceed limit, we are done
                     if (start >= contentLength)
                     {
                         Passage[] passages = passageQueue.ToArray();
                         foreach (Passage p in passages)
                         {
                             p.Sort();
                         }
                         // sort in ascending order
                         ArrayUtil.TimSort(passages, new HighlightDocComparerAnonymousHelper2());
                         return passages;
                     }
                     // advance breakiterator
                     Debug.Assert(BreakIterator.Done < 0);
                     current.startOffset = Math.Max(bi.Preceding(start + 1), 0);
                     current.endOffset = Math.Min(bi.Next(), contentLength);
                 }
                 int tf = 0;
                 while (true)
                 {
                     tf++;
                     BytesRef term = terms[off.id];
                     if (term == null)
                     {
                         // multitermquery match, pull from payload
                         term = off.dp.GetPayload();
                         Debug.Assert(term != null);
                     }
                     current.AddMatch(start, end, term);
                     if (off.pos == dp.Freq)
                     {
                         break; // removed from pq
                     }
                     else
                     {
                         off.pos++;
                         dp.NextPosition();
                         start = dp.StartOffset;
                         end = dp.EndOffset;
                     }
                     if (start >= current.endOffset || end > contentLength)
                     {
                         pq.Enqueue(off);
                         break;
                     }
                 }
                 current.score += weights[off.id] * scorer.Tf(tf, current.endOffset - current.startOffset);
             }

             // Dead code but compiler disagrees:
             Debug.Assert(false);
             return null;
         }

         /// <summary>
         /// Called to summarize a document when no hits were
         /// found.  By default this just returns the first
         /// <paramref name="maxPassages"/> sentences; subclasses can override
         /// to customize.
         /// </summary>
         protected virtual Passage[] GetEmptyHighlight(string fieldName, BreakIterator bi, int maxPassages)
         {
             // BreakIterator should be un-next'd:
             List<Passage> passages = new List<Passage>();
             int pos = bi.Current;
             Debug.Assert(pos == 0);
             while (passages.Count < maxPassages)
             {
                 int next = bi.Next();
                 if (next == BreakIterator.Done)
                 {
                     break;
                 }
                 Passage passage = new Passage();
                 passage.score = float.NaN;
                 passage.startOffset = pos;
                 passage.endOffset = next;
                 passages.Add(passage);
                 pos = next;
             }

             return passages.ToArray(/*new Passage[passages.size()]*/);
         }

         internal class OffsetsEnum : IComparable<OffsetsEnum>
         {
             internal DocsAndPositionsEnum dp;
             internal int pos;
             internal int id;

             internal OffsetsEnum(DocsAndPositionsEnum dp, int id)
             {
                 this.dp = dp;
                 this.id = id;
                 this.pos = 1;
             }

             public virtual int CompareTo(OffsetsEnum other)
             {
                 try
                 {
                     int off = dp.StartOffset;
                     int otherOff = other.dp.StartOffset;
                     if (off == otherOff)
                     {
                         return id - other.id;
                     }
                     else
                     {
                         return off.CompareTo(otherOff);
                     }
                 }
                 catch (IOException e)
                 {
                     throw new Exception(e.ToString(), e);
                 }
             }
         }

         private static readonly DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnumAnonymousHelper();

         /// <summary>
         /// we rewrite against an empty indexreader: as we don't want things like
         /// rangeQueries that don't summarize the document
         /// </summary>
         private class DocsAndPositionsEnumAnonymousHelper : DocsAndPositionsEnum
         {
             public override int NextPosition()
             {
                 return 0;
             }

             public override int StartOffset
             {
                 get { return int.MaxValue; }
             }

             public override int EndOffset
             {
                 get { return int.MaxValue; }
             }

             public override BytesRef GetPayload()
             {
                 return null;
             }

             public override int Freq
             {
                 get { return 0; }
             }

             public override int DocID
             {
                 get { return NO_MORE_DOCS; }
             }

             public override int NextDoc()
             {
                 return NO_MORE_DOCS;
             }

             public override int Advance(int target)
             {
                 return NO_MORE_DOCS;
             }

             public override long GetCost()
             {
                 return 0;
             }
         }

         private static Query Rewrite(Query original)
         {
             Query query = original;
             for (Query rewrittenQuery = query.Rewrite(EMPTY_INDEXREADER); rewrittenQuery != query;
                 rewrittenQuery = query.Rewrite(EMPTY_INDEXREADER))
             {
                 query = rewrittenQuery;
             }
             return query;
         }

         private class LimitedStoredFieldVisitor : StoredFieldVisitor
         {
             private readonly string[] fields;
             private readonly char[] valueSeparators;
             private readonly int maxLength;
             private readonly StringBuilder[] builders;
             private int currentField = -1;

             public LimitedStoredFieldVisitor(string[] fields, char[] valueSeparators, int maxLength)
             {
                 Debug.Assert(fields.Length == valueSeparators.Length);
                 this.fields = fields;
                 this.valueSeparators = valueSeparators;
                 this.maxLength = maxLength;
                 builders = new StringBuilder[fields.Length];
                 for (int i = 0; i < builders.Length; i++)
                 {
                     builders[i] = new StringBuilder();
                 }
             }

             public override void StringField(Index.FieldInfo fieldInfo, string value)
             {
                 Debug.Assert(currentField >= 0);
                 StringBuilder builder = builders[currentField];
                 if (builder.Length > 0 && builder.Length < maxLength)
                 {
                     builder.Append(valueSeparators[currentField]);
                 }
                 if (builder.Length + value.Length > maxLength)
                 {
                     builder.Append(value, 0, maxLength - builder.Length);
                 }
                 else
                 {
                     builder.Append(value);
                 }
             }

             public override Status NeedsField(Index.FieldInfo fieldInfo)
             {
                 currentField = Array.BinarySearch(fields, fieldInfo.Name);
                 if (currentField < 0)
                 {
                     return Status.NO;
                 }
                 else if (builders[currentField].Length > maxLength)
                 {
                     return fields.Length == 1 ? Status.STOP : Status.NO;
                 }
                 return Status.YES;
             }

             internal string GetValue(int i)
             {
                 return builders[i].ToString();
             }

             internal void Reset()
             {
                 currentField = -1;
                 for (int i = 0; i < fields.Length; i++)
                 {
                     builders[i].Length = 0;
                 }
             }
         }
     }
 }
 #endif