lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search.uhighlight;


 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.Objects;

 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import com.carrotsearch.randomizedtesting.generators.RandomStrings;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spans.SpanBoostQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.SpanWeight;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
 import org.apache.lucene.store.BaseDirectoryWrapper;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.UnicodeUtil;
 import org.junit.After;
 import org.junit.Before;

 /**
  * Some tests that highlight wildcard, fuzzy, etc queries.
  */
 public class TestUnifiedHighlighterMTQ extends LuceneTestCase {

   final FieldType fieldType;

   BaseDirectoryWrapper dir;
   Analyzer indexAnalyzer;

   @ParametersFactory
   public static Iterable<Object[]> parameters() {
     return UHTestHelper.parametersFactoryList();
   }

   public TestUnifiedHighlighterMTQ(FieldType fieldType) {
     this.fieldType = fieldType;
   }

   @Before
   public void doBefore() throws IOException {
     dir = newDirectory();
     indexAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);//whitespace, punctuation, lowercase
   }

   @After
   public void doAfter() throws IOException {
     dir.close();
   }

   public void testWildcards() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
     Query query = new WildcardQuery(new Term("body", "te*"));
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // disable MTQ; won't highlight
     highlighter.setHandleMultiTermQuery(false);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);
     highlighter.setHandleMultiTermQuery(true);//reset

     // wrong field
     BooleanQuery bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(new WildcardQuery(new Term("bogus", "te*")), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   private UnifiedHighlighter randomUnifiedHighlighter(IndexSearcher searcher, Analyzer indexAnalyzer) {
     return TestUnifiedHighlighter.randomUnifiedHighlighter(searcher, indexAnalyzer,
         EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
   }

   public void testOnePrefix() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     // wrap in a BoostQuery to also show we see inside it
     Query query = new BoostQuery(new PrefixQuery(new Term("body", "te")), 2.0f);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // wrong field
     highlighter.setFieldMatcher(null);//default
     BooleanQuery bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testOneRegexp() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     Query query = new RegexpQuery(new Term("body", "te.*"));
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // wrong field
     highlighter.setFieldMatcher(null);//default
     BooleanQuery bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testFuzzy() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     Query query = new FuzzyQuery(new Term("body", "tets"), 1);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // with prefix
     query = new FuzzyQuery(new Term("body", "tets"), 1, 2);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // with zero max edits
     query = new FuzzyQuery(new Term("body", "test"), 0, 2);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // wrong field
     highlighter.setFieldMatcher(null);//default
     BooleanQuery bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(new FuzzyQuery(new Term("bogus", "tets"), 1), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testRanges() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // null start
     query = TermRangeQuery.newStringRange("body", null, "tf", true, true);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);

     // null end
     query = TermRangeQuery.newStringRange("body", "ta", null, true, true);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // exact start inclusive
     query = TermRangeQuery.newStringRange("body", "test", "tf", true, true);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // exact end inclusive
     query = TermRangeQuery.newStringRange("body", "ta", "test", true, true);
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // exact start exclusive
     BooleanQuery bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     // exact end exclusive
     bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     // wrong field
     highlighter.setFieldMatcher(null);//default
     bq = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD)
         .build();
     topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", bq, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testWildcardInBoolean() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     BooleanQuery query = new BooleanQuery.Builder()
         .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     // must not
     query = new BooleanQuery.Builder()
         .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
         .add(new WildcardQuery(new Term("bogus", "te*")), BooleanClause.Occur.MUST_NOT)
         .build();
     topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a test.", snippets[0]);
     assertEquals("Test a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testWildcardInFiltered() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     BooleanQuery query = new BooleanQuery.Builder()
         .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.MUST)
         .add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.FILTER)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testWildcardInConstantScore() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     ConstantScoreQuery query = new ConstantScoreQuery(new WildcardQuery(new Term("body", "te*")));
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testWildcardInDisjunctionMax() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     DisjunctionMaxQuery query = new DisjunctionMaxQuery(
         Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testSpanWildcard() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     // wrap in a SpanBoostQuery to also show we see inside it
     Query query = new SpanBoostQuery(
         new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))), 2.0f);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testSpanOr() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
     Query query = new SpanOrQuery(new SpanQuery[]{childQuery});
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testSpanNear() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
     Query query = new SpanNearQuery(new SpanQuery[]{childQuery, childQuery}, 0, false);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testSpanNot() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
     SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
     Query query = new SpanNotQuery(include, exclude);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   public void testSpanPositionCheck() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
     Query query = new SpanFirstQuery(childQuery, 1000000);
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
   }

   /**
    * Runs a query with two MTQs and confirms the formatter
    * can tell which query matched which hit.
    */
   public void testWhichMTQMatched() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     // use a variety of common MTQ types
     BooleanQuery query = new BooleanQuery.Builder()
         .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
         .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
         .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits.value);
     String snippets[] = highlighter.highlight("body", query, topDocs);
     assertEquals(1, snippets.length);

     // Default formatter just bolds each hit:
     assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

     // Now use our own formatter, that also stuffs the
     // matching term's text into the result:
     highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

       @Override
       protected PassageFormatter getFormatter(String field) {
         return new PassageFormatter() {

           @Override
           public Object format(Passage passages[], String content) {
             // Copied from DefaultPassageFormatter, but
             // tweaked to include the matched term:
             StringBuilder sb = new StringBuilder();
             int pos = 0;
             for (Passage passage : passages) {
               // don't add ellipsis if its the first one, or if its connected.
               if (passage.getStartOffset() > pos && pos > 0) {
                 sb.append("... ");
               }
               pos = passage.getStartOffset();
               for (int i = 0; i < passage.getNumMatches(); i++) {
                 int start = passage.getMatchStarts()[i];
                 int end = passage.getMatchEnds()[i];
                 // its possible to have overlapping terms
                 if (start > pos) {
                   sb.append(content, pos, start);
                 }
                 if (end > pos) {
                   sb.append("<b>");
                   sb.append(content, Math.max(pos, start), end);
                   sb.append('(');
                   sb.append(passage.getMatchTerms()[i].utf8ToString());
                   sb.append(')');
                   sb.append("</b>");
                   pos = end;
                 }
               }
               // its possible a "term" from the analyzer could span a sentence boundary.
               sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
               pos = passage.getEndOffset();
             }
             return sb.toString();
           }
         };
       }
     };

     assertEquals(1, topDocs.totalHits.value);
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(1, snippets.length);

     assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);

     ir.close();
   }


   //
   //  All tests below were *not* ported from the PostingsHighlighter; they are new to the U.H.
   //

   public void testWithMaxLen() throws IOException {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");//44 char long, 2 sentences
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     highlighter.setMaxLength(25);//a little past first sentence

     BooleanQuery query = new BooleanQuery.Builder()
         .add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST)
         .add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String snippets[] = highlighter.highlight("body", query, topDocs, 2);//ask for 2 but we'll only get 1
     assertArrayEquals(
         new String[]{"<b>Alpha</b> <b>Bravo</b> foo foo foo. "}, snippets
     );

     ir.close();
   }

   public void testWithMaxLenAndMultipleWildcardMatches() throws IOException {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     //tests interleaving of multiple wildcard matches with the CompositePostingsEnum
     //In this case the CompositePostingsEnum will have an underlying PostingsEnum that jumps form pos 1 to 9 for bravo
     //and a second with position 2 for Bravado
     body.setStringValue("Alpha Bravo Bravado foo foo foo. Foo foo Alpha Bravo");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     highlighter.setMaxLength(32);//a little past first sentence

     BooleanQuery query = new BooleanQuery.Builder()
         .add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST)
         .add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     String snippets[] = highlighter.highlight("body", query, topDocs, 2);//ask for 2 but we'll only get 1
     assertArrayEquals(
         new String[]{"<b>Alpha</b> <b>Bravo</b> <b>Bravado</b> foo foo foo."}, snippets
     );

     ir.close();
   }

   public void testTokenStreamIsClosed() throws Exception {
     // note: test is a derivative of testWithMaxLen()
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Field body = new Field("body", "", fieldType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");
     if (random().nextBoolean()) { // sometimes add a 2nd value (maybe matters?)
       doc.add(new Field("body", "2nd value Alpha Bravo", fieldType));
     }
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     // use this buggy Analyzer at highlight time
     Analyzer buggyAnalyzer = new Analyzer() {
       @Override
       protected TokenStreamComponents createComponents(String fieldName) {
         Tokenizer buggyTokenizer = new Tokenizer() {
           @Override
           public boolean incrementToken() throws IOException {
             throw new IOException("EXPECTED");
           }
         };
         return new TokenStreamComponents(buggyTokenizer);
       }
     };

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, buggyAnalyzer);
     highlighter.setHandleMultiTermQuery(true);
     if (rarely()) {
       highlighter.setMaxLength(25);//a little past first sentence
     }

     boolean hasClauses = false;
     BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
     if (random().nextBoolean()) {
       hasClauses = true;
       queryBuilder.add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST);
     }
     if (!hasClauses || random().nextBoolean()) {
       queryBuilder.add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST);
     }
     BooleanQuery query = queryBuilder.build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     try {
       String snippets[] = highlighter.highlight("body", query, topDocs, 2);
       // don't even care what the results are; just want to test exception behavior
       if (fieldType == UHTestHelper.reanalysisType) {
         fail("Expecting EXPECTED IOException");
       }
     } catch (Exception e) {
       if (!e.getMessage().contains("EXPECTED")) {
         throw e;
       }
     }
     ir.close();

     // Now test we can get the tokenStream without it puking due to IllegalStateException for not calling close()

     try (TokenStream ts = buggyAnalyzer.tokenStream("body", "anything")) {
       ts.reset();// hopefully doesn't throw
       // don't call incrementToken; we know it's buggy ;-)
     }
   }

   /**
    * Not empty but nothing analyzes. Ensures we address null term-vectors.
    */
   public void testNothingAnalyzes() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Document doc = new Document();
     doc.add(new Field("body", " ", fieldType));// just a space! (thus not empty)
     doc.add(newTextField("id", "id", Field.Store.YES));
     iw.addDocument(doc);

     doc = new Document();
     doc.add(new Field("body", "something", fieldType));
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

     Query query = new PrefixQuery(new Term("body", "nonexistent"));
     int[] docIDs = new int[1];
     docIDs[0] = docID;
     String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
     assertEquals(1, snippets.length);
     assertEquals(" ", snippets[0]);

     ir.close();
   }

   public void testMultiSegment() throws Exception {
     // If we incorrectly got the term vector from mis-matched global/leaf doc ID, this test may fail
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

     Document doc = new Document();
     doc.add(new Field("body", "word aberration", fieldType));
     iw.addDocument(doc);

     iw.commit(); // make segment

     doc = new Document();
     doc.add(new Field("body", "word absolve", fieldType));
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     Query query = new PrefixQuery(new Term("body", "ab"));
     TopDocs topDocs = searcher.search(query, 10);

     String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, topDocs).get("body");
     Arrays.sort(snippets);
     assertEquals("[word <b>aberration</b>, word <b>absolve</b>]", Arrays.toString(snippets));

     ir.close();
   }

   public void testPositionSensitiveWithWildcardDoesNotHighlight() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
     Document doc = new Document();
     doc.add(new Field("body", "iterate insect ipswitch illinois indirect", fieldType));
     doc.add(newTextField("id", "id", Field.Store.YES));

     iw.addDocument(doc);
     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

     PhraseQuery pq = new PhraseQuery.Builder()
         .add(new Term("body", "consent"))
         .add(new Term("body", "order"))
         .build();

     BooleanQuery query = new BooleanQuery.Builder()
         .add(new WildcardQuery(new Term("body", "enforc*")), BooleanClause.Occur.MUST)
         .add(pq, BooleanClause.Occur.MUST)
         .build();

     int[] docIds = new int[]{docID};

     String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
     assertEquals(1, snippets.length);
     assertEquals("iterate insect ipswitch illinois indirect", snippets[0]);
     ir.close();
   }

   public void testCustomSpanQueryHighlighting() throws Exception {
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
     Document doc = new Document();
     doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
     doc.add(newTextField("id", "id", Field.Store.YES));

     iw.addDocument(doc);
     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);

     int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

     WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
     SpanMultiTermQueryWrapper<WildcardQuery> wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);

     SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);

     BooleanQuery query = new BooleanQuery.Builder()
         .add(wrappedQuery, BooleanClause.Occur.SHOULD)
         .build();

     int[] docIds = new int[]{docId};

     String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
     assertEquals(1, snippets.length);
     assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
     ir.close();
   }

   private static class MyWrapperSpanQuery extends SpanQuery {

     private final SpanQuery originalQuery;

     private MyWrapperSpanQuery(SpanQuery originalQuery) {
       this.originalQuery = Objects.requireNonNull(originalQuery);
     }

     @Override
     public String getField() {
       return originalQuery.getField();
     }

     @Override
     public String toString(String field) {
       return "(Wrapper[" + originalQuery.toString(field)+"])";
     }

     @Override
     public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
       return originalQuery.createWeight(searcher, scoreMode, boost);
     }

     @Override
     public void visit(QueryVisitor visitor) {
       originalQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
     }

     @Override
     public Query rewrite(IndexReader reader) throws IOException {
       Query newOriginalQuery = originalQuery.rewrite(reader);
       if (newOriginalQuery != originalQuery) {
         return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
       }
       return this;
     }

     @Override
     public boolean equals(Object o) {
       if (this == o) return true;
       if (o == null || getClass() != o.getClass()) return false;
       return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
     }

     @Override
     public int hashCode() {
       return originalQuery.hashCode();
     }
   }

   // LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
   public void testRussianPrefixQuery() throws IOException {
     Analyzer analyzer = new StandardAnalyzer();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
     String field = "title";
     Document doc = new Document();
     doc.add(new Field(field, "я", fieldType)); // Russian char; uses 2 UTF8 bytes
     iw.addDocument(doc);
     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     Query query = new PrefixQuery(new Term(field, "я"));
     TopDocs topDocs = searcher.search(query, 1);
     UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer);
     String[] snippets = highlighter.highlight(field, query, topDocs);
     assertEquals("[<b>я</b>]", Arrays.toString(snippets));
     ir.close();
   }

   // LUCENE-7719
   public void testMultiByteMTQ() throws IOException {
     Analyzer analyzer = new KeywordAnalyzer();
     try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer)) {
       for (int attempt = 0; attempt < 20; attempt++) {
         iw.deleteAll();
         String field = "title";
         String value = RandomStrings.randomUnicodeOfLength(random(), 3);
         if (value.contains(UnifiedHighlighter.MULTIVAL_SEP_CHAR+"")) { // will throw things off
           continue;
         }
         int[] valuePoints = value.codePoints().toArray();

         iw.addDocument(Collections.singleton(
             new Field(field, value, fieldType)));
         iw.commit();
         try (IndexReader ir = iw.getReader()) {
           IndexSearcher searcher = newSearcher(ir);
           UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer);
           highlighter.setBreakIterator(WholeBreakIterator::new);

           // Test PrefixQuery
           Query query = new PrefixQuery(new Term(field,
               UnicodeUtil.newString(valuePoints, 0, 1)));
           highlightAndAssertMatch(searcher, highlighter, query, field, value);

           // Test TermRangeQuery
           query = new TermRangeQuery(field,
               new BytesRef(value),
               new BytesRef(value),
               true, true );
           highlightAndAssertMatch(searcher, highlighter, query, field, value);

           // Test FuzzyQuery
           query = new FuzzyQuery(new Term(field, value + "Z"), 1);
           highlightAndAssertMatch(searcher, highlighter, query, field, value);

           if (valuePoints.length != 3) {
             continue; // even though we ask RandomStrings for a String with 3 code points, it seems sometimes it's less
           }

           // Test WildcardQuery
           query = new WildcardQuery(new Term(field,
               new StringBuilder()
                   .append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[0])
                   .append(WildcardQuery.WILDCARD_CHAR)
                   .append(WildcardQuery.WILDCARD_ESCAPE).appendCodePoint(valuePoints[2]).toString()));
           highlightAndAssertMatch(searcher, highlighter, query, field, value);

           //TODO hmmm; how to randomly generate RegexpQuery? Low priority; we've covered the others well.
         }
       }
     }
   }

   private void highlightAndAssertMatch(IndexSearcher searcher, UnifiedHighlighter highlighter, Query query, String field, String fieldVal) throws IOException {
     TopDocs topDocs = searcher.search(query, 1);
     assertEquals(1, topDocs.totalHits.value);
     String[] snippets = highlighter.highlight(field, query, topDocs);
     assertEquals("[<b>"+fieldVal+"</b>]", Arrays.toString(snippets));
   }
 }