lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java - lucene-solr - Git at Google

 package org.apache.lucene.search.postingshighlight;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.text.BreakIterator;
 import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;

 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighter extends LuceneTestCase {

   public void testBasics() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     iw.addDocument(doc);
     body.setStringValue("Highlighting the first term. Hope it works.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "highlighting"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
     assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);

     ir.close();
     dir.close();
   }

   // simple test with one sentence documents.
   public void testOneSentence() throws Exception {
     Directory dir = newDirectory();
     // use simpleanalyzer for more natural tokenization (else "test." is a token)
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.");
     iw.addDocument(doc);
     body.setStringValue("Test a one sentence document.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>.", snippets[0]);
     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

     ir.close();
     dir.close();
   }

   // simple test with multiple values that make a result longer than maxLength.
   public void testMaxLengthWithMultivalue() throws Exception {
     Directory dir = newDirectory();
     // use simpleanalyzer for more natural tokenization (else "test." is a token)
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     for(int i = 0; i < 3 ; i++) {
       Field body = new Field("body", "", offsetsType);
       body.setStringValue("This is a multivalued field");
       doc.add(body);
     }

     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter(40);
     Query query = new TermQuery(new Term("body", "field"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(1, snippets.length);
     assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
         snippets[0].length() == (40 + "<b></b>".length()));

     ir.close();
     dir.close();
   }

   public void testMultipleFields() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Field title = new Field("title", "", offsetsType);
     Document doc = new Document();
     doc.add(body);
     doc.add(title);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     title.setStringValue("I am hoping for the best.");
     iw.addDocument(doc);
     body.setStringValue("Highlighting the first term. Hope it works.");
     title.setStringValue("But best may not be good enough.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query, searcher, topDocs);
     assertEquals(2, snippets.size());
     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
     assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
     assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
     assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
     ir.close();
     dir.close();
   }

   public void testMultipleTerms() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     iw.addDocument(doc);
     body.setStringValue("Highlighting the first term. Hope it works.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD);
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(2, snippets.length);
     assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
     assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);

     ir.close();
     dir.close();
   }

   public void testMultiplePassages() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     iw.addDocument(doc);
     body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(2, snippets.length);
     assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
     assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);

     ir.close();
     dir.close();
   }

   public void testUserFailedToIndexOffsets() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
     Field body = new Field("body", "", positionsType);
     Field title = new StringField("title", "", Field.Store.YES);
     Document doc = new Document();
     doc.add(body);
     doc.add(title);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     title.setStringValue("test");
     iw.addDocument(doc);
     body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
     title.setStringValue("test");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     try {
       highlighter.highlight("body", query, searcher, topDocs, 2);
       fail("did not hit expected exception");
     } catch (IllegalArgumentException iae) {
       // expected
     }

     try {
       highlighter.highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2);
       fail("did not hit expected exception");
     } catch (IllegalArgumentException iae) {
       // expected
     }
     ir.close();
     dir.close();
   }

   public void testBuddhism() throws Exception {
     String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
                   "range of academic disciplines published over the last forty years. With a new introduction " +
                   "by the editor, this collection is a unique and unrivalled research resource for both " +
                   "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
                   "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
                   "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
                   "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
                   "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
                   "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
     Directory dir = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);

     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", text, positionsType);
     Document document = new Document();
     document.add(body);
     iw.addDocument(document);
     IndexReader ir = iw.getReader();
     iw.close();
     IndexSearcher searcher = newSearcher(ir);
     PhraseQuery query = new PhraseQuery();
     query.add(new Term("body", "buddhist"));
     query.add(new Term("body", "origins"));
     TopDocs topDocs = searcher.search(query, 10);
     assertEquals(1, topDocs.totalHits);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
     ir.close();
     dir.close();
   }

   public void testCuriousGeorge() throws Exception {
     String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
                   "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
                   "popular primate and painted in the original watercolor and charcoal style. " +
                   "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
     Directory dir = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", text, positionsType);
     Document document = new Document();
     document.add(body);
     iw.addDocument(document);
     IndexReader ir = iw.getReader();
     iw.close();
     IndexSearcher searcher = newSearcher(ir);
     PhraseQuery query = new PhraseQuery();
     query.add(new Term("body", "curious"));
     query.add(new Term("body", "george"));
     TopDocs topDocs = searcher.search(query, 10);
     assertEquals(1, topDocs.totalHits);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
     ir.close();
     dir.close();
   }

   public void testCambridgeMA() throws Exception {
     BufferedReader r = new BufferedReader(new InputStreamReader(
                      this.getClass().getResourceAsStream("CambridgeMA.utf8"), "UTF-8"));
     String text = r.readLine();
     r.close();
     Directory dir = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", text, positionsType);
     Document document = new Document();
     document.add(body);
     iw.addDocument(document);
     IndexReader ir = iw.getReader();
     iw.close();
     IndexSearcher searcher = newSearcher(ir);
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD);
     TopDocs topDocs = searcher.search(query, 10);
     assertEquals(1, topDocs.totalHits);
     PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertTrue(snippets[0].contains("<b>Square</b>"));
     assertTrue(snippets[0].contains("<b>Porter</b>"));
     ir.close();
     dir.close();
   }

   public void testPassageRanking() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertEquals("This is a <b>test</b>.  ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);

     ir.close();
     dir.close();
   }

   public void testBooleanMustNot() throws Exception {
     Directory dir = newDirectory();
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", positionsType);
     Document document = new Document();
     document.add(body);
     iw.addDocument(document);
     IndexReader ir = iw.getReader();
     iw.close();
     IndexSearcher searcher = newSearcher(ir);
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
     BooleanQuery query2 = new BooleanQuery();
     query.add(query2, BooleanClause.Occur.SHOULD);
     query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
     TopDocs topDocs = searcher.search(query, 10);
     assertEquals(1, topDocs.totalHits);
     PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertFalse(snippets[0].contains("<b>both</b>"));
     ir.close();
     dir.close();
   }

   public void testHighlightAllText() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
       @Override
       protected BreakIterator getBreakIterator(String field) {
         return new WholeBreakIterator();
       }
     };
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);

     ir.close();
     dir.close();
   }

   public void testSpecificDocIDs() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     iw.addDocument(doc);
     body.setStringValue("Highlighting the first term. Hope it works.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "highlighting"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(2, topDocs.totalHits);
     ScoreDoc[] hits = topDocs.scoreDocs;
     int[] docIDs = new int[2];
     docIDs[0] = hits[0].doc;
     docIDs[1] = hits[1].doc;
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
     assertEquals(2, snippets.length);
     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
     assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);

     ir.close();
     dir.close();
   }

   public void testCustomFieldValueSource() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     Document doc = new Document();

     FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
     Field body = new Field("body", text, offsetsType);
     doc.add(body);
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);

     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
         @Override
         protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
           assert fields.length == 1;
           assert docids.length == 1;
           String[][] contents = new String[1][1];
           contents[0][0] = text;
           return contents;
         }

         @Override
         protected BreakIterator getBreakIterator(String field) {
           return new WholeBreakIterator();
         }
       };

     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
     assertEquals(1, snippets.length);
     assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);

     ir.close();
     dir.close();
   }

   /** Make sure highlighter returns first N sentences if
    *  there were no hits. */
   public void testEmptyHighlights() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
     doc.add(body);
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("test this is.  another sentence this test has.  ", snippets[0]);

     ir.close();
     dir.close();
   }

   /** Make sure highlighter we can customize how emtpy
    *  highlight is returned. */
   public void testCustomEmptyHighlights() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
     doc.add(body);
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter() {
         @Override
         public Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
           return new Passage[0];
         }
       };
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);

     ir.close();
     dir.close();
   }

   /** Make sure highlighter returns whole text when there
    *  are no hits and BreakIterator is null. */
   public void testEmptyHighlightsWhole() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
     doc.add(body);
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
       @Override
       protected BreakIterator getBreakIterator(String field) {
         return new WholeBreakIterator();
       }
     };
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("test this is.  another sentence this test has.  far away is that planet.", snippets[0]);

     ir.close();
     dir.close();
   }

   /** Make sure highlighter is OK with entirely missing
    *  field. */
   public void testFieldIsMissing() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
     doc.add(body);
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("bogus", "highlighting"));
     int[] docIDs = new int[] {0};
     String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);

     ir.close();
     dir.close();
   }

   public void testFieldIsJustSpace() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);

     Document doc = new Document();
     doc.add(new Field("body", "   ", offsetsType));
     doc.add(new Field("id", "id", offsetsType));
     iw.addDocument(doc);

     doc = new Document();
     doc.add(new Field("body", "something", offsetsType));
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[1];
     docIDs[0] = docID;
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertEquals("   ", snippets[0]);

     ir.close();
     dir.close();
   }

   public void testFieldIsEmptyString() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);

     Document doc = new Document();
     doc.add(new Field("body", "", offsetsType));
     doc.add(new Field("id", "id", offsetsType));
     iw.addDocument(doc);

     doc = new Document();
     doc.add(new Field("body", "something", offsetsType));
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[1];
     docIDs[0] = docID;
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
     assertEquals(1, snippets.length);
     assertNull(snippets[0]);

     ir.close();
     dir.close();
   }

   public void testMultipleDocs() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);

     int numDocs = atLeast(100);
     for(int i=0;i<numDocs;i++) {
       Document doc = new Document();
       String content = "the answer is " + i;
       if ((i & 1) == 0) {
         content += " some more terms";
       }
       doc.add(new Field("body", content, offsetsType));
       doc.add(newStringField("id", ""+i, Field.Store.YES));
       iw.addDocument(doc);

       if (random().nextInt(10) == 2) {
         iw.commit();
       }
     }

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     Query query = new TermQuery(new Term("body", "answer"));
     TopDocs hits = searcher.search(query, numDocs);
     assertEquals(numDocs, hits.totalHits);

     String snippets[] = highlighter.highlight("body", query, searcher, hits);
     assertEquals(numDocs, snippets.length);
     for(int hit=0;hit<numDocs;hit++) {
       StoredDocument doc = searcher.doc(hits.scoreDocs[hit].doc);
       int id = Integer.parseInt(doc.get("id"));
       String expected = "the <b>answer</b> is " + id;
       if ((id  & 1) == 0) {
         expected += " some more terms";
       }
       assertEquals(expected, snippets[hit]);
     }

     ir.close();
     dir.close();
   }

   public void testMultipleSnippetSizes() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Field title = new Field("title", "", offsetsType);
     Document doc = new Document();
     doc.add(body);
     doc.add(title);

     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter();
     BooleanQuery query = new BooleanQuery();
     query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
     query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
     Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query, searcher, new int[] { 0 }, new int[] { 1, 2 });
     String titleHighlight = snippets.get("title")[0];
     String bodyHighlight = snippets.get("body")[0];
     assertEquals("This is a <b>test</b>. ", titleHighlight);
     assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
     ir.close();
     dir.close();
   }

   public void testEncode() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Field body = new Field("body", "", offsetsType);
     Document doc = new Document();
     doc.add(body);

     body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter() {
       @Override
       protected PassageFormatter getFormatter(String field) {
         return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
       }
     };
     Query query = new TermQuery(new Term("body", "highlighting"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(1, snippets.length);
     assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);

     ir.close();
     dir.close();
   }

   /** customizing the gap separator to force a sentence break */
   public void testGapSeparator() throws Exception {
     Directory dir = newDirectory();
     // use simpleanalyzer for more natural tokenization (else "test." is a token)
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
     Document doc = new Document();

     Field body1 = new Field("body", "", offsetsType);
     body1.setStringValue("This is a multivalued field");
     doc.add(body1);

     Field body2 = new Field("body", "", offsetsType);
     body2.setStringValue("This is something different");
     doc.add(body2);

     iw.addDocument(doc);

     IndexReader ir = iw.getReader();
     iw.close();

     IndexSearcher searcher = newSearcher(ir);
     PostingsHighlighter highlighter = new PostingsHighlighter() {
       @Override
       protected char getMultiValuedSeparator(String field) {
         assert field.equals("body");
         return '\u2029';
       }
     };
     Query query = new TermQuery(new Term("body", "field"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
     assertEquals(1, snippets.length);
     assertEquals("This is a multivalued <b>field</b>\u2029", snippets[0]);

     ir.close();
     dir.close();
   }
 }