blob: 8228ecc34511c9a358197f9d5b75ed4784b1af4e [file] [log] [blame]
#if FEATURE_BREAKITERATOR
using ICU4N.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Diagnostics;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text;
using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Search.PostingsHighlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// LUCENENET specific - These are the original tests from Lucene. They are only here as proof that we
/// can customize the <see cref="ICUPostingsHighlighter"/> to act like the PostingsHighlighter in Lucene,
/// which has slightly different default behavior than that of ICU because Lucene uses
/// the RuleBasedBreakIterator from the JDK, not that of ICU4J.
/// <para/>
/// These tests use a mock <see cref="PostingsHighlighter"/>, which is backed by an ICU
/// <see cref="ICU4N.Text.RuleBasedBreakIterator"/> that is customized a bit to act (sort of)
/// like the one in the JDK. However, this customized implementation is not a logical default for
/// the <see cref="ICUPostingsHighlighter"/>.
/// </summary>
[SuppressCodecs("MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x")]
public class TestPostingsHighlighter : LuceneTestCase
{
[Test]
public void TestBasics()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
body.SetStringValue("Highlighting the first term. Hope it works.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.Length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestFormatWithMatchExceedingContentLength2()
{
String bodyText = "123 TEST 01234 TEST";
String[]
snippets = formatWithMatchExceedingContentLength(bodyText);
assertEquals(1, snippets.Length);
assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
}
[Test]
public void TestFormatWithMatchExceedingContentLength3()
{
String bodyText = "123 5678 01234 TEST TEST";
String[]
snippets = formatWithMatchExceedingContentLength(bodyText);
assertEquals(1, snippets.Length);
assertEquals("123 5678 01234 TE", snippets[0]);
}
[Test]
public void TestFormatWithMatchExceedingContentLength()
{
String bodyText = "123 5678 01234 TEST";
String[]
snippets = formatWithMatchExceedingContentLength(bodyText);
assertEquals(1, snippets.Length);
// LUCENE-5166: no snippet
assertEquals("123 5678 01234 TE", snippets[0]);
}
private String[] formatWithMatchExceedingContentLength(String bodyText)
{
int maxLength = 17;
Analyzer analyzer = new MockAnalyzer(Random);
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType fieldType = new FieldType(TextField.TYPE_STORED);
fieldType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", bodyText, fieldType);
Document doc = new Document();
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
ir.Dispose();
dir.Dispose();
return snippets;
}
// simple test highlighting last word.
[Test]
public void TestHighlightLastWord()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.Length);
assertEquals("This is a <b>test</b>", snippets[0]);
ir.Dispose();
dir.Dispose();
}
// simple test with one sentence documents.
[Test]
public void TestOneSentence()
{
Directory dir = NewDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test.");
iw.AddDocument(doc);
body.SetStringValue("Test a one sentence document.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.Length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.Dispose();
dir.Dispose();
}
// simple test with multiple values that make a result longer than maxLength.
[Test]
public void TestMaxLengthWithMultivalue()
{
Directory dir = NewDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
for (int i = 0; i < 3; i++)
{
Field body = new Field("body", "", offsetsType);
body.SetStringValue("This is a multivalued field");
doc.Add(body);
}
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter(40);
Query query = new TermQuery(new Term("body", "field"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.Length);
assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
snippets[0].Length == (40 + "<b></b>".Length));
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestMultipleFields()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Field title = new Field("title", "", offsetsType);
Document doc = new Document();
doc.Add(body);
doc.Add(title);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
title.SetStringValue("I am hoping for the best.");
iw.AddDocument(doc);
body.SetStringValue("Highlighting the first term. Hope it works.");
title.SetStringValue("But best may not be good enough.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("body", "highlighting")), Occur.SHOULD);
query.Add(new TermQuery(new Term("title", "best")), Occur.SHOULD);
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
IDictionary<String, String[]> snippets = highlighter.HighlightFields(new String[] { "body", "title" }, query, searcher, topDocs);
assertEquals(2, snippets.size());
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets["body"][0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets["body"][1]);
assertEquals("I am hoping for the <b>best</b>.", snippets["title"][0]);
assertEquals("But <b>best</b> may not be good enough.", snippets["title"][1]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestMultipleTerms()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
body.SetStringValue("Highlighting the first term. Hope it works.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("body", "highlighting")), Occur.SHOULD);
query.Add(new TermQuery(new Term("body", "just")), Occur.SHOULD);
query.Add(new TermQuery(new Term("body", "first")), Occur.SHOULD);
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.Length);
assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestMultiplePassages()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
body.SetStringValue("This test is another test. Not a good sentence. Test test test test.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(2, snippets.Length);
assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestUserFailedToIndexOffsets()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
Field body = new Field("body", "", positionsType);
Field title = new StringField("title", "", Field.Store.YES);
Document doc = new Document();
doc.Add(body);
doc.Add(title);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
title.SetStringValue("test");
iw.AddDocument(doc);
body.SetStringValue("This test is another test. Not a good sentence. Test test test test.");
title.SetStringValue("test");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
try
{
highlighter.Highlight("body", query, searcher, topDocs, 2);
fail("did not hit expected exception");
}
#pragma warning disable 168
catch (ArgumentException iae)
#pragma warning restore 168
{
// expected
}
try
{
highlighter.Highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2);
fail("did not hit expected exception");
}
#pragma warning disable 168
catch (ArgumentException iae)
#pragma warning restore 168
{
// expected
}
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestBuddhism()
{
String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
"range of academic disciplines published over the last forty years. With a new introduction " +
"by the editor, this collection is a unique and unrivalled research resource for both " +
"student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
"South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
"- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
"and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
"Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
"Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
Directory dir = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.Add(body);
iw.AddDocument(document);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PhraseQuery query = new PhraseQuery();
query.Add(new Term("body", "buddhist"));
query.Add(new Term("body", "origins"));
TopDocs topDocs = searcher.Search(query, 10);
assertEquals(1, topDocs.TotalHits);
PostingsHighlighter highlighter = new PostingsHighlighter();
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertTrue(snippets[0].Contains("<b>Buddhist</b> <b>origins</b>"));
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestCuriousGeorge()
{
String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " +
"Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
"popular primate and painted in the original watercolor and charcoal style. " +
"Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
Directory dir = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.Add(body);
iw.AddDocument(document);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PhraseQuery query = new PhraseQuery();
query.Add(new Term("body", "curious"));
query.Add(new Term("body", "george"));
TopDocs topDocs = searcher.Search(query, 10);
assertEquals(1, topDocs.TotalHits);
PostingsHighlighter highlighter = new PostingsHighlighter();
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertFalse(snippets[0].Contains("<b>Curious</b>Curious"));
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestCambridgeMA()
{
String text;
using (TextReader r = new StreamReader(this.GetType().getResourceAsStream("CambridgeMA.utf8"), Encoding.UTF8))
{
text = r.ReadLine();
}
Store.Directory dir = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.Add(body);
iw.AddDocument(document);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("body", "porter")), Occur.SHOULD);
query.Add(new TermQuery(new Term("body", "square")), Occur.SHOULD);
query.Add(new TermQuery(new Term("body", "massachusetts")), Occur.SHOULD);
TopDocs topDocs = searcher.Search(query, 10);
assertEquals(1, topDocs.TotalHits);
PostingsHighlighter highlighter = new PostingsHighlighter(int.MaxValue - 1);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertTrue(snippets[0].Contains("<b>Square</b>"));
assertTrue(snippets[0].Contains("<b>Porter</b>"));
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestPassageRanking()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertEquals("This is a <b>test</b>. ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestBooleanMustNot()
{
Directory dir = NewDirectory();
Analyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "This sentence has both terms. This sentence has only terms.", positionsType);
Document document = new Document();
document.Add(body);
iw.AddDocument(document);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("body", "terms")), Occur.SHOULD);
BooleanQuery query2 = new BooleanQuery();
query.Add(query2, Occur.SHOULD);
query2.Add(new TermQuery(new Term("body", "both")), Occur.MUST_NOT);
TopDocs topDocs = searcher.Search(query, 10);
assertEquals(1, topDocs.TotalHits);
PostingsHighlighter highlighter = new PostingsHighlighter(int.MaxValue - 1);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertFalse(snippets[0].Contains("<b>both</b>"));
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestHighlightAllText()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new WholeBreakIteratorPostingsHighlighter(10000);
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.Dispose();
dir.Dispose();
}
internal class WholeBreakIteratorPostingsHighlighter : PostingsHighlighter
{
public WholeBreakIteratorPostingsHighlighter()
: base()
{
}
public WholeBreakIteratorPostingsHighlighter(int maxLength)
: base(maxLength)
{
}
protected override BreakIterator GetBreakIterator(string field)
{
return new WholeBreakIterator();
}
}
[Test]
public void TestSpecificDocIDs()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
body.SetStringValue("Highlighting the first term. Hope it works.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.TotalHits);
ScoreDoc[] hits = topDocs.ScoreDocs;
int[] docIDs = new int[2];
docIDs[0] = hits[0].Doc;
docIDs[1] = hits[1].Doc;
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 1 })["body"];
assertEquals(2, snippets.Length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestCustomFieldValueSource()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
Document doc = new Document();
FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
Field body = new Field("body", text, offsetsType);
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new LoadFieldValuesPostingsHighlighter(10000, text);
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.Length);
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.Dispose();
dir.Dispose();
}
internal class LoadFieldValuesPostingsHighlighter : WholeBreakIteratorPostingsHighlighter
{
private readonly string text;
public LoadFieldValuesPostingsHighlighter(int maxLength, string text)
: base(maxLength)
{
this.text = text;
}
protected override IList<string[]> LoadFieldValues(IndexSearcher searcher, string[] fields, int[] docids, int maxLength)
{
if (Debugging.AssertsEnabled) Debugging.Assert(fields.Length == 1);
if (Debugging.AssertsEnabled) Debugging.Assert(docids.Length == 1);
String[][] contents = RectangularArrays.ReturnRectangularArray<string>(1, 1); //= new String[1][1];
contents[0][0] = text;
return contents;
}
}
/** Make sure highlighter returns first N sentences if
* there were no hits. */
[Test]
public void TestEmptyHighlights()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] { 0 };
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 2 })["body"];
assertEquals(1, snippets.Length);
assertEquals("test this is. another sentence this test has. ", snippets[0]);
ir.Dispose();
dir.Dispose();
}
/** Make sure highlighter we can customize how emtpy
* highlight is returned. */
[Test]
public void TestCustomEmptyHighlights()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new GetEmptyHighlightPostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] { 0 };
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 2 })["body"];
assertEquals(1, snippets.Length);
assertNull(snippets[0]);
ir.Dispose();
dir.Dispose();
}
internal class GetEmptyHighlightPostingsHighlighter : PostingsHighlighter
{
protected override Passage[] GetEmptyHighlight(string fieldName, BreakIterator bi, int maxPassages)
{
return new Passage[0];
}
}
/** Make sure highlighter returns whole text when there
* are no hits and BreakIterator is null. */
[Test]
public void TestEmptyHighlightsWhole()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new WholeBreakIteratorPostingsHighlighter(10000);
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] { 0 };
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 2 })["body"];
assertEquals(1, snippets.Length);
assertEquals("test this is. another sentence this test has. far away is that planet.", snippets[0]);
ir.Dispose();
dir.Dispose();
}
/** Make sure highlighter is OK with entirely missing
* field. */
[Test]
public void TestFieldIsMissing()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
Field body = new Field("body", "test this is. another sentence this test has. far away is that planet.", offsetsType);
doc.Add(body);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("bogus", "highlighting"));
int[] docIDs = new int[] { 0 };
String[] snippets = highlighter.HighlightFields(new String[] { "bogus" }, query, searcher, docIDs, new int[] { 2 })["bogus"];
assertEquals(1, snippets.Length);
assertNull(snippets[0]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestFieldIsJustSpace()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
doc.Add(new Field("body", " ", offsetsType));
doc.Add(new Field("id", "id", offsetsType));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new Field("body", "something", offsetsType));
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
int docID = searcher.Search(new TermQuery(new Term("id", "id")), 1).ScoreDocs[0].Doc;
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[1];
docIDs[0] = docID;
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 2 })["body"];
assertEquals(1, snippets.Length);
assertEquals(" ", snippets[0]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestFieldIsEmptyString()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
doc.Add(new Field("body", "", offsetsType));
doc.Add(new Field("id", "id", offsetsType));
iw.AddDocument(doc);
doc = new Document();
doc.Add(new Field("body", "something", offsetsType));
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
int docID = searcher.Search(new TermQuery(new Term("id", "id")), 1).ScoreDocs[0].Doc;
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[1];
docIDs[0] = docID;
String[] snippets = highlighter.HighlightFields(new String[] { "body" }, query, searcher, docIDs, new int[] { 2 })["body"];
assertEquals(1, snippets.Length);
assertNull(snippets[0]);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestMultipleDocs()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
int numDocs = AtLeast(100);
for (int i = 0; i < numDocs; i++)
{
Document doc = new Document();
String content = "the answer is " + i;
if ((i & 1) == 0)
{
content += " some more terms";
}
doc.Add(new Field("body", content, offsetsType));
doc.Add(NewStringField("id", "" + i, Field.Store.YES));
iw.AddDocument(doc);
if (Random.nextInt(10) == 2)
{
iw.Commit();
}
}
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "answer"));
TopDocs hits = searcher.Search(query, numDocs);
assertEquals(numDocs, hits.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, hits);
assertEquals(numDocs, snippets.Length);
for (int hit = 0; hit < numDocs; hit++)
{
Document doc = searcher.Doc(hits.ScoreDocs[hit].Doc);
int id = int.Parse(doc.Get("id"), CultureInfo.InvariantCulture);
String expected = "the <b>answer</b> is " + id;
if ((id & 1) == 0)
{
expected += " some more terms";
}
assertEquals(expected, snippets[hit]);
}
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestMultipleSnippetSizes()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Field title = new Field("title", "", offsetsType);
Document doc = new Document();
doc.Add(body);
doc.Add(title);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
title.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
BooleanQuery query = new BooleanQuery();
query.Add(new TermQuery(new Term("body", "test")), Occur.SHOULD);
query.Add(new TermQuery(new Term("title", "test")), Occur.SHOULD);
IDictionary<String, String[]> snippets = highlighter.HighlightFields(new String[] { "title", "body" }, query, searcher, new int[] { 0 }, new int[] { 1, 2 });
String titleHighlight = snippets["title"][0];
String bodyHighlight = snippets["body"][0];
assertEquals("This is a <b>test</b>. ", titleHighlight);
assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
ir.Dispose();
dir.Dispose();
}
[Test]
public void TestEncode()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new GetFormatterPostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.Length);
assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
ir.Dispose();
dir.Dispose();
}
internal class GetFormatterPostingsHighlighter : PostingsHighlighter
{
protected override PassageFormatter GetFormatter(string field)
{
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
}
/** customizing the gap separator to force a sentence break */
[Test]
public void TestGapSeparator()
{
Directory dir = NewDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.SIMPLE, true));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Document doc = new Document();
Field body1 = new Field("body", "", offsetsType);
body1.SetStringValue("This is a multivalued field");
doc.Add(body1);
Field body2 = new Field("body", "", offsetsType);
body2.SetStringValue("This is something different");
doc.Add(body2);
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new GetMultiValuedSeparatorPostingsHighlighter();
Query query = new TermQuery(new Term("body", "field"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
String[] snippets = highlighter.Highlight("body", query, searcher, topDocs);
assertEquals(1, snippets.Length);
assertEquals("This is a multivalued <b>field</b>\u2029", snippets[0]);
ir.Dispose();
dir.Dispose();
}
internal class GetMultiValuedSeparatorPostingsHighlighter : PostingsHighlighter
{
protected override char GetMultiValuedSeparator(string field)
{
if (Debugging.AssertsEnabled) Debugging.Assert(field.Equals("body", StringComparison.Ordinal));
return '\u2029';
}
}
// LUCENE-4906
[Test]
public void TestObjectFormatter()
{
Directory dir = NewDirectory();
IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
iwc.SetMergePolicy(NewLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.IndexOptions = (IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.Add(body);
body.SetStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.AddDocument(doc);
IndexReader ir = iw.GetReader();
iw.Dispose();
IndexSearcher searcher = NewSearcher(ir);
PostingsHighlighter highlighter = new ObjectFormatterPostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.Search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.TotalHits);
int[] docIDs = new int[1];
docIDs[0] = topDocs.ScoreDocs[0].Doc;
IDictionary<String, Object[]> snippets = highlighter.HighlightFieldsAsObjects(new String[] { "body" }, query, searcher, docIDs, new int[] { 1 });
Object[] bodySnippets = snippets["body"];
assertEquals(1, bodySnippets.Length);
assertTrue(Arrays.Equals(new String[] { "blah blah", "Just a test <b>highlighting</b> from postings. " }, (String[])bodySnippets[0]));
ir.Dispose();
dir.Dispose();
}
internal class ObjectFormatterPostingsHighlighter : PostingsHighlighter
{
protected override PassageFormatter GetFormatter(string field)
{
return new PassageFormatterHelper();
}
internal class PassageFormatterHelper : PassageFormatter
{
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
public override object Format(Passage[] passages, string content)
{
// Just turns the String snippet into a length 2
// array of String
return new String[] { "blah blah", defaultFormatter.Format(passages, content).toString() };
}
}
}
}
}
#endif