solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.highlight;

 import java.io.IOException;
 import java.text.BreakIterator;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.function.Predicate;

 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
 import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
 import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
 import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.PassageScorer;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.uhighlight.WholeBreakIterator;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.core.PluginInfo;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SolrReturnFields;
 import org.apache.solr.util.RTimerTree;
 import org.apache.solr.util.plugin.PluginInfoInitialized;

 /**
  * Highlighter impl that uses {@link UnifiedHighlighter}
  * <p>
  * Example configuration with default values:
  * <pre class="prettyprint">
  * &lt;requestHandler name="/select" class="solr.SearchHandler"&gt;
  * &lt;lst name="defaults"&gt;
  * &lt;str name="hl.method"&gt;unified&lt;/str&gt;
  * &lt;int name="hl.snippets"&gt;1&lt;/int&gt;
  * &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
  * &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
  * &lt;str name="hl.simple.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
  * &lt;str name="hl.simple.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
  * &lt;str name="hl.tag.ellipsis"&gt;(internal/unspecified)&lt;/str&gt;
  * &lt;bool name="hl.defaultSummary"&gt;false&lt;/bool&gt;
  * &lt;str name="hl.encoder"&gt;simple&lt;/str&gt;
  * &lt;float name="hl.score.k1"&gt;1.2&lt;/float&gt;
  * &lt;float name="hl.score.b"&gt;0.75&lt;/float&gt;
  * &lt;float name="hl.score.pivot"&gt;87&lt;/float&gt;
  * &lt;str name="hl.bs.language"&gt;&lt;/str&gt;
  * &lt;str name="hl.bs.country"&gt;&lt;/str&gt;
  * &lt;str name="hl.bs.variant"&gt;&lt;/str&gt;
  * &lt;str name="hl.bs.type"&gt;SENTENCE&lt;/str&gt;
  * &lt;int name="hl.maxAnalyzedChars"&gt;51200&lt;/int&gt;
  * &lt;bool name="hl.highlightMultiTerm"&gt;true&lt;/bool&gt;
  * &lt;bool name="hl.usePhraseHighlighter"&gt;true&lt;/bool&gt;
  * &lt;int name="hl.cacheFieldValCharsThreshold"&gt;524288&lt;/int&gt;
  * &lt;str name="hl.offsetSource"&gt;&lt;/str&gt;
  * &lt;bool name="hl.weightMatches"&gt;true&lt;/bool&gt;
  * &lt;/lst&gt;
  * &lt;/requestHandler&gt;
  * </pre>
  * <p>
  * Notes:
  * <ul>
  * <li>hl.q (string) can specify the query
  * <li>hl.fl (string) specifies the field list.
  * <li>hl.snippets (int) specifies how many snippets to return.
  * <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
  * <li>hl.tag.post (string) specifies text which appears after a highlighted term.
  * <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
  * <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
  * <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
  * value in a list without joining them.
  * <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
  * <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
  * <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
  * <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
  * <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
  * <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
  * <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
  * <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
  * <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
  * <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
  * <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
  * <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
  * <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
  * <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
  * <li>hl.weightMatches (bool) enables Lucene Weight Matches mode</li>
  * </ul>
  *
  * @lucene.experimental
  */
 public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {

   protected static final String SNIPPET_SEPARATOR = "\u0000";
   private static final String[] ZERO_LEN_STR_ARRAY = new String[0];

   @Override
   public void init(PluginInfo info) {
   }

   @Override
   public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
     final SolrParams params = req.getParams();

     // if highlighting isn't enabled, then why call doHighlighting?
     if (!isHighlightingEnabled(params))
       return null;

     int[] docIDs = toDocIDs(docs);

     // fetch the unique keys
     String[] keys = getUniqueKeys(req.getSearcher(), docIDs);

     // query-time parameters
     String[] fieldNames = getHighlightFields(query, req, defaultFields);

     int maxPassages[] = new int[fieldNames.length];
     for (int i = 0; i < fieldNames.length; i++) {
       maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
     }

     UnifiedHighlighter highlighter = getHighlighter(req);
     Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
     return encodeSnippets(keys, fieldNames, snippets);
   }

   /**
    * Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
    * a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
    */
   protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
     return new SolrExtendedUnifiedHighlighter(req);
   }

   /**
    * Encodes the resulting snippets into a namedlist
    *
    * @param keys       the document unique keys
    * @param fieldNames field names to highlight in the order
    * @param snippets   map from field name to snippet array for the docs
    * @return encoded namedlist of summaries
    */
   protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
     NamedList<Object> list = new SimpleOrderedMap<>();
     for (int i = 0; i < keys.length; i++) {
       NamedList<Object> summary = new SimpleOrderedMap<>();
       for (String field : fieldNames) {
         String snippet = snippets.get(field)[i];
         if (snippet == null) {
           //TODO reuse logic of DefaultSolrHighlighter.alternateField
           summary.add(field, ZERO_LEN_STR_ARRAY);
         } else {
           // we used a special snippet separator char and we can now split on it.
           summary.add(field, snippet.split(SNIPPET_SEPARATOR));
         }
       }
       list.add(keys[i], summary);
     }
     return list;
   }

   /**
    * Converts solr's DocList to the int[] docIDs
    */
   protected int[] toDocIDs(DocList docs) {
     int[] docIDs = new int[docs.size()];
     DocIterator iterator = docs.iterator();
     for (int i = 0; i < docIDs.length; i++) {
       if (!iterator.hasNext()) {
         throw new AssertionError();
       }
       docIDs[i] = iterator.nextDoc();
     }
     if (iterator.hasNext()) {
       throw new AssertionError();
     }
     return docIDs;
   }

   /**
    * Retrieves the unique keys for the topdocs to key the results
    */
   protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
     IndexSchema schema = searcher.getSchema();
     SchemaField keyField = schema.getUniqueKeyField();
     if (keyField != null) {
       SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
       String[] uniqueKeys = new String[docIDs.length];
       for (int i = 0; i < docIDs.length; i++) {
         int docid = docIDs[i];
         SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
         uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
       }
       return uniqueKeys;
     } else {
       return new String[docIDs.length];
     }
   }

   /**
    * From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
    */
   protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
     protected final static Predicate<String> NOT_REQUIRED_FIELD_MATCH_PREDICATE = s -> true;
     protected final SolrParams params;

     protected final IndexSchema schema;
     protected final RTimerTree loadFieldValuesTimer;

     public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
       super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
       this.params = req.getParams();
       this.schema = req.getSchema();
       this.setMaxLength(
           params.getInt(HighlightParams.MAX_CHARS, DEFAULT_MAX_CHARS));
       this.setCacheFieldValCharsThreshold(
           params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));

       final RTimerTree timerTree;
       if (req.getRequestTimer() != null) { //It may be null if not used in a search context.
         timerTree = req.getRequestTimer();
       } else {
         timerTree = new RTimerTree(); // since null checks are annoying
       }
       loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
       loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
     }

     @Override
     protected OffsetSource getOffsetSource(String field) {
       String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
       if (sourceStr != null) {
         return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
       } else {
         return super.getOffsetSource(field);
       }
     }

     // optimization for Solr which keeps a FieldInfos on-hand
     @Override
     protected FieldInfo getFieldInfo(String field) {
       return ((SolrIndexSearcher)searcher).getFieldInfos().fieldInfo(field);
     }

     @Override
     public int getMaxNoHighlightPassages(String field) {
       boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
       if (defaultSummary) {
         return -1;// signifies return first hl.snippets passages worth of the content
       } else {
         return 0;// will return null
       }
     }

     @Override
     protected PassageFormatter getFormatter(String fieldName) {
       String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
           params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
       );

       String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
           params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
       );
       String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
       String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
       return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
     }

     @Override
     protected PassageScorer getScorer(String fieldName) {
       float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
       float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
       float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
       return new PassageScorer(k1, b, pivot);
     }

     @Override
     protected BreakIterator getBreakIterator(String field) {
       // Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
       //  both likely shooting for sentence-like patterns.
       int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
       String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
       if (fragsize == 0 || "WHOLE".equals(type)) { // 0 is special value; no fragmenting
         return new WholeBreakIterator();
       }

       BreakIterator baseBI;
       if ("SEPARATOR".equals(type)) {
         char customSep = parseBiSepChar(params.getFieldParam(field, HighlightParams.BS_SEP));
         baseBI = new CustomSeparatorBreakIterator(customSep);
       } else {
         String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
         String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
         String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
         Locale locale = parseLocale(language, country, variant);
         baseBI = parseBreakIterator(type, locale);
       }

       if (fragsize <= 1) { // no real minimum size
         return baseBI;
       }

       float fragalign = params.getFieldFloat(field, HighlightParams.FRAGALIGNRATIO, 0.5f);
       if (params.getFieldBool(field, HighlightParams.FRAGSIZEISMINIMUM, true)) {
         return LengthGoalBreakIterator.createMinLength(baseBI, fragsize, fragalign);
       }
       return LengthGoalBreakIterator.createClosestToLength(baseBI, fragsize, fragalign);
     }

     /**
      * parse custom separator char for {@link CustomSeparatorBreakIterator}
      */
     protected char parseBiSepChar(String sepChar) {
       if (sepChar == null) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, HighlightParams.BS_SEP + " not passed");
       }
       if (sepChar.length() != 1) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, HighlightParams.BS_SEP +
             " must be a single char but got: '" + sepChar + "'");
       }
       return sepChar.charAt(0);
     }

     /**
      * parse a break iterator type for the specified locale
      */
     protected BreakIterator parseBreakIterator(String type, Locale locale) {
       if (type == null || "SENTENCE".equals(type)) {
         return BreakIterator.getSentenceInstance(locale);
       } else if ("LINE".equals(type)) {
         return BreakIterator.getLineInstance(locale);
       } else if ("WORD".equals(type)) {
         return BreakIterator.getWordInstance(locale);
       } else if ("CHARACTER".equals(type)) {
         return BreakIterator.getCharacterInstance(locale);
       } else {
         throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
       }
     }

     /**
      * parse a locale from a language+country+variant spec
      */
     protected Locale parseLocale(String language, String country, String variant) {
       if (language == null && country == null && variant == null) {
         return Locale.ROOT;
       } else if (language == null) {
         throw new IllegalArgumentException("language is required if country or variant is specified");
       } else if (country == null && variant != null) {
         throw new IllegalArgumentException("To specify variant, country is required");
       } else if (country != null && variant != null) {
         return new Locale(language, country, variant);
       } else if (country != null) {
         return new Locale(language, country);
       } else {
         return new Locale(language);
       }
     }

     @Override
     protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
         cacheCharsThreshold) throws IOException {
       // Time loading field values.  It can be an expensive part of highlighting.
       loadFieldValuesTimer.resume();
       try {
         return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
       } finally {
         loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
       }
     }

     @Override
     protected Set<HighlightFlag> getFlags(String field) {
       Set<HighlightFlag> flags = EnumSet.noneOf(HighlightFlag.class);
       if (params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true)) {
         flags.add(HighlightFlag.MULTI_TERM_QUERY);
       }
       if (params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
         flags.add(HighlightFlag.PHRASES);
       }
       flags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);

       if (params.getFieldBool(field, HighlightParams.WEIGHT_MATCHES, true)
           && flags.contains(HighlightFlag.PHRASES) && flags.contains(HighlightFlag.MULTI_TERM_QUERY)) {
         flags.add(HighlightFlag.WEIGHT_MATCHES);
       }
       return flags;
     }

     @Override
     protected Predicate<String> getFieldMatcher(String field) {
       // TODO define hl.queryFieldPattern as a more advanced alternative to hl.requireFieldMatch.

       // note that the UH at Lucene level default to effectively "true"
       if (params.getFieldBool(field, HighlightParams.FIELD_MATCH, false)) {
         return field::equals; // requireFieldMatch
       } else {
         return NOT_REQUIRED_FIELD_MATCH_PREDICATE;
       }
     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.highlight;

	import java.io.IOException;
	import java.text.BreakIterator;
	import java.util.EnumSet;
	import java.util.List;
	import java.util.Locale;
	import java.util.Map;
	import java.util.Set;
	import java.util.function.Predicate;

	import org.apache.lucene.index.FieldInfo;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
	import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
	import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
	import org.apache.lucene.search.uhighlight.PassageFormatter;
	import org.apache.lucene.search.uhighlight.PassageScorer;
	import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
	import org.apache.lucene.search.uhighlight.WholeBreakIterator;
	import org.apache.solr.common.SolrDocument;
	import org.apache.solr.common.SolrException;
	import org.apache.solr.common.params.HighlightParams;
	import org.apache.solr.common.params.SolrParams;
	import org.apache.solr.common.util.NamedList;
	import org.apache.solr.common.util.SimpleOrderedMap;
	import org.apache.solr.core.PluginInfo;
	import org.apache.solr.request.SolrQueryRequest;
	import org.apache.solr.schema.IndexSchema;
	import org.apache.solr.schema.SchemaField;
	import org.apache.solr.search.DocIterator;
	import org.apache.solr.search.DocList;
	import org.apache.solr.search.SolrIndexSearcher;
	import org.apache.solr.search.SolrReturnFields;
	import org.apache.solr.util.RTimerTree;
	import org.apache.solr.util.plugin.PluginInfoInitialized;

	/**
	* Highlighter impl that uses {@link UnifiedHighlighter}
	* <p>
	* Example configuration with default values:
	* <pre class="prettyprint">
	* <requestHandler name="/select" class="solr.SearchHandler">
	* <lst name="defaults">
	* <str name="hl.method">unified</str>
	* <int name="hl.snippets">1</int>
	* <str name="hl.tag.pre">&lt;em&gt;</str>
	* <str name="hl.tag.post">&lt;/em&gt;</str>
	* <str name="hl.simple.pre">&lt;em&gt;</str>
	* <str name="hl.simple.post">&lt;/em&gt;</str>
	* <str name="hl.tag.ellipsis">(internal/unspecified)</str>
	* <bool name="hl.defaultSummary">false</bool>
	* <str name="hl.encoder">simple</str>
	* <float name="hl.score.k1">1.2</float>
	* <float name="hl.score.b">0.75</float>
	* <float name="hl.score.pivot">87</float>
	* <str name="hl.bs.language"></str>
	* <str name="hl.bs.country"></str>
	* <str name="hl.bs.variant"></str>
	* <str name="hl.bs.type">SENTENCE</str>
	* <int name="hl.maxAnalyzedChars">51200</int>
	* <bool name="hl.highlightMultiTerm">true</bool>
	* <bool name="hl.usePhraseHighlighter">true</bool>
	* <int name="hl.cacheFieldValCharsThreshold">524288</int>
	* <str name="hl.offsetSource"></str>
	* <bool name="hl.weightMatches">true</bool>
	* </lst>
	* </requestHandler>
	* </pre>
	* <p>
	* Notes:
	* <ul>
	* <li>hl.q (string) can specify the query
	* <li>hl.fl (string) specifies the field list.
	* <li>hl.snippets (int) specifies how many snippets to return.
	* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
	* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
	* <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
	* <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
	* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
	* value in a list without joining them.
	* <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
	* <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
	* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
	* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
	* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
	* <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
	* <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
	* <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
	* <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
	* <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
	* <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
	* <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
	* <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
	* <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
	* <li>hl.weightMatches (bool) enables Lucene Weight Matches mode</li>
	* </ul>
	*
	* @lucene.experimental
	*/
	public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {

	protected static final String SNIPPET_SEPARATOR = "\u0000";
	private static final String[] ZERO_LEN_STR_ARRAY = new String[0];

	@Override
	public void init(PluginInfo info) {
	}

	@Override
	public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
	final SolrParams params = req.getParams();

	// if highlighting isn't enabled, then why call doHighlighting?
	if (!isHighlightingEnabled(params))
	return null;

	int[] docIDs = toDocIDs(docs);

	// fetch the unique keys
	String[] keys = getUniqueKeys(req.getSearcher(), docIDs);

	// query-time parameters
	String[] fieldNames = getHighlightFields(query, req, defaultFields);

	int maxPassages[] = new int[fieldNames.length];
	for (int i = 0; i < fieldNames.length; i++) {
	maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
	}

	UnifiedHighlighter highlighter = getHighlighter(req);
	Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
	return encodeSnippets(keys, fieldNames, snippets);
	}

	/**
	* Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
	* a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
	*/
	protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
	return new SolrExtendedUnifiedHighlighter(req);
	}

	/**
	* Encodes the resulting snippets into a namedlist
	*
	* @param keys the document unique keys
	* @param fieldNames field names to highlight in the order
	* @param snippets map from field name to snippet array for the docs
	* @return encoded namedlist of summaries
	*/
	protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
	NamedList<Object> list = new SimpleOrderedMap<>();
	for (int i = 0; i < keys.length; i++) {
	NamedList<Object> summary = new SimpleOrderedMap<>();
	for (String field : fieldNames) {
	String snippet = snippets.get(field)[i];
	if (snippet == null) {
	//TODO reuse logic of DefaultSolrHighlighter.alternateField
	summary.add(field, ZERO_LEN_STR_ARRAY);
	} else {
	// we used a special snippet separator char and we can now split on it.
	summary.add(field, snippet.split(SNIPPET_SEPARATOR));
	}
	}
	list.add(keys[i], summary);
	}
	return list;
	}

	/**
	* Converts solr's DocList to the int[] docIDs
	*/
	protected int[] toDocIDs(DocList docs) {
	int[] docIDs = new int[docs.size()];
	DocIterator iterator = docs.iterator();
	for (int i = 0; i < docIDs.length; i++) {
	if (!iterator.hasNext()) {
	throw new AssertionError();
	}
	docIDs[i] = iterator.nextDoc();
	}
	if (iterator.hasNext()) {
	throw new AssertionError();
	}
	return docIDs;
	}

	/**
	* Retrieves the unique keys for the topdocs to key the results
	*/
	protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
	IndexSchema schema = searcher.getSchema();
	SchemaField keyField = schema.getUniqueKeyField();
	if (keyField != null) {
	SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
	String[] uniqueKeys = new String[docIDs.length];
	for (int i = 0; i < docIDs.length; i++) {
	int docid = docIDs[i];
	SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
	uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
	}
	return uniqueKeys;
	} else {
	return new String[docIDs.length];
	}
	}

	/**
	* From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
	*/
	protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
	protected final static Predicate<String> NOT_REQUIRED_FIELD_MATCH_PREDICATE = s -> true;
	protected final SolrParams params;

	protected final IndexSchema schema;
	protected final RTimerTree loadFieldValuesTimer;

	public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
	super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
	this.params = req.getParams();
	this.schema = req.getSchema();
	this.setMaxLength(
	params.getInt(HighlightParams.MAX_CHARS, DEFAULT_MAX_CHARS));
	this.setCacheFieldValCharsThreshold(
	params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));

	final RTimerTree timerTree;
	if (req.getRequestTimer() != null) { //It may be null if not used in a search context.
	timerTree = req.getRequestTimer();
	} else {
	timerTree = new RTimerTree(); // since null checks are annoying
	}
	loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
	loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
	}

	@Override
	protected OffsetSource getOffsetSource(String field) {
	String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
	if (sourceStr != null) {
	return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
	} else {
	return super.getOffsetSource(field);
	}
	}

	// optimization for Solr which keeps a FieldInfos on-hand
	@Override
	protected FieldInfo getFieldInfo(String field) {
	return ((SolrIndexSearcher)searcher).getFieldInfos().fieldInfo(field);
	}

	@Override
	public int getMaxNoHighlightPassages(String field) {
	boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
	if (defaultSummary) {
	return -1;// signifies return first hl.snippets passages worth of the content
	} else {
	return 0;// will return null
	}
	}

	@Override
	protected PassageFormatter getFormatter(String fieldName) {
	String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
	params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
	);

	String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
	params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
	);
	String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
	String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
	return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
	}

	@Override
	protected PassageScorer getScorer(String fieldName) {
	float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
	float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
	float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
	return new PassageScorer(k1, b, pivot);
	}

	@Override
	protected BreakIterator getBreakIterator(String field) {
	// Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
	// both likely shooting for sentence-like patterns.
	int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
	String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
	if (fragsize == 0 \|\| "WHOLE".equals(type)) { // 0 is special value; no fragmenting
	return new WholeBreakIterator();
	}

	BreakIterator baseBI;
	if ("SEPARATOR".equals(type)) {
	char customSep = parseBiSepChar(params.getFieldParam(field, HighlightParams.BS_SEP));
	baseBI = new CustomSeparatorBreakIterator(customSep);
	} else {
	String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
	String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
	String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
	Locale locale = parseLocale(language, country, variant);
	baseBI = parseBreakIterator(type, locale);
	}

	if (fragsize <= 1) { // no real minimum size
	return baseBI;
	}

	float fragalign = params.getFieldFloat(field, HighlightParams.FRAGALIGNRATIO, 0.5f);
	if (params.getFieldBool(field, HighlightParams.FRAGSIZEISMINIMUM, true)) {
	return LengthGoalBreakIterator.createMinLength(baseBI, fragsize, fragalign);
	}
	return LengthGoalBreakIterator.createClosestToLength(baseBI, fragsize, fragalign);
	}

	/**
	* parse custom separator char for {@link CustomSeparatorBreakIterator}
	*/
	protected char parseBiSepChar(String sepChar) {
	if (sepChar == null) {
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, HighlightParams.BS_SEP + " not passed");
	}
	if (sepChar.length() != 1) {
	throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, HighlightParams.BS_SEP +
	" must be a single char but got: '" + sepChar + "'");
	}
	return sepChar.charAt(0);
	}

	/**
	* parse a break iterator type for the specified locale
	*/
	protected BreakIterator parseBreakIterator(String type, Locale locale) {
	if (type == null \|\| "SENTENCE".equals(type)) {
	return BreakIterator.getSentenceInstance(locale);
	} else if ("LINE".equals(type)) {
	return BreakIterator.getLineInstance(locale);
	} else if ("WORD".equals(type)) {
	return BreakIterator.getWordInstance(locale);
	} else if ("CHARACTER".equals(type)) {
	return BreakIterator.getCharacterInstance(locale);
	} else {
	throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
	}
	}

	/**
	* parse a locale from a language+country+variant spec
	*/
	protected Locale parseLocale(String language, String country, String variant) {
	if (language == null && country == null && variant == null) {
	return Locale.ROOT;
	} else if (language == null) {
	throw new IllegalArgumentException("language is required if country or variant is specified");
	} else if (country == null && variant != null) {
	throw new IllegalArgumentException("To specify variant, country is required");
	} else if (country != null && variant != null) {
	return new Locale(language, country, variant);
	} else if (country != null) {
	return new Locale(language, country);
	} else {
	return new Locale(language);
	}
	}

	@Override
	protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
	cacheCharsThreshold) throws IOException {
	// Time loading field values. It can be an expensive part of highlighting.
	loadFieldValuesTimer.resume();
	try {
	return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
	} finally {
	loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
	}
	}

	@Override
	protected Set<HighlightFlag> getFlags(String field) {
	Set<HighlightFlag> flags = EnumSet.noneOf(HighlightFlag.class);
	if (params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true)) {
	flags.add(HighlightFlag.MULTI_TERM_QUERY);
	}
	if (params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
	flags.add(HighlightFlag.PHRASES);
	}
	flags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);

	if (params.getFieldBool(field, HighlightParams.WEIGHT_MATCHES, true)
	&& flags.contains(HighlightFlag.PHRASES) && flags.contains(HighlightFlag.MULTI_TERM_QUERY)) {
	flags.add(HighlightFlag.WEIGHT_MATCHES);
	}
	return flags;
	}

	@Override
	protected Predicate<String> getFieldMatcher(String field) {
	// TODO define hl.queryFieldPattern as a more advanced alternative to hl.requireFieldMatch.

	// note that the UH at Lucene level default to effectively "true"
	if (params.getFieldBool(field, HighlightParams.FIELD_MATCH, false)) {
	return field::equals; // requireFieldMatch
	} else {
	return NOT_REQUIRED_FIELD_MATCH_PREDICATE;
	}
	}
	}

	}