blob: 87c2fcaab1208a2e0f05d709a031903fd3fe929b [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search.Spans;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
namespace Lucene.Net.Search.Highlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class HighlighterPhraseTest : LuceneTestCase
{
private static readonly String FIELD = "text";
[Test]
public void TestConcurrentPhrase()
{
String TEXT = "the fox jumped";
Directory directory = NewDirectory();
IndexWriter indexWriter = new IndexWriter(directory,
NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
try
{
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.StoreTermVectorOffsets = (true);
customType.StoreTermVectorPositions = (true);
customType.StoreTermVectors = (true);
document.Add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.AddDocument(document);
}
finally
{
indexWriter.Dispose();
}
IndexReader indexReader = DirectoryReader.Open(directory);
try
{
assertEquals(1, indexReader.NumDocs);
IndexSearcher indexSearcher = NewSearcher(indexReader);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term(FIELD, "fox"));
phraseQuery.Add(new Term(FIELD, "jumped"));
phraseQuery.Slop = (0);
TopDocs hits = indexSearcher.Search(phraseQuery, 1);
assertEquals(1, hits.TotalHits);
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
TokenStream tokenStream = TokenSources
.GetTokenStream(indexReader.GetTermVector(
0, FIELD), false);
assertEquals(highlighter.GetBestFragment(new TokenStreamConcurrent(),
TEXT), highlighter.GetBestFragment(tokenStream, TEXT));
}
finally
{
indexReader.Dispose();
directory.Dispose();
}
}
[Test]
public void TestConcurrentSpan()
{
String TEXT = "the fox jumped";
Directory directory = NewDirectory();
IndexWriter indexWriter = new IndexWriter(directory,
NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
try
{
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.StoreTermVectorOffsets = (true);
customType.StoreTermVectorPositions = (true);
customType.StoreTermVectors = (true);
document.Add(new Field(FIELD, new TokenStreamConcurrent(), customType));
indexWriter.AddDocument(document);
}
finally
{
indexWriter.Dispose();
}
IndexReader indexReader = DirectoryReader.Open(directory);
try
{
assertEquals(1, indexReader.NumDocs);
IndexSearcher indexSearcher = NewSearcher(indexReader);
Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD, "fox")),
new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
FixedBitSet bitset = new FixedBitSet(indexReader.MaxDoc);
indexSearcher.Search(phraseQuery, new ConcurrentSpanCollectorAnonymousClass(this, bitset));
assertEquals(1, bitset.Cardinality());
int maxDoc = indexReader.MaxDoc;
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
for (int position = bitset.NextSetBit(0); position >= 0 && position < maxDoc - 1; position = bitset
.NextSetBit(position + 1))
{
assertEquals(0, position);
TokenStream tokenStream = TokenSources.GetTokenStream(
indexReader.GetTermVector(position,
FIELD), false);
assertEquals(highlighter.GetBestFragment(new TokenStreamConcurrent(),
TEXT), highlighter.GetBestFragment(tokenStream, TEXT));
}
}
finally
{
indexReader.Dispose();
directory.Dispose();
}
}
internal class ConcurrentSpanCollectorAnonymousClass : ICollector
{
private readonly HighlighterPhraseTest outerInstance;
private readonly FixedBitSet bitset;
public ConcurrentSpanCollectorAnonymousClass(HighlighterPhraseTest outerInstance, FixedBitSet bitset)
{
this.outerInstance = outerInstance;
this.bitset = bitset;
}
private int baseDoc;
public virtual bool AcceptsDocsOutOfOrder => true;
public virtual void Collect(int i)
{
bitset.Set(this.baseDoc + i);
}
public virtual void SetNextReader(AtomicReaderContext context)
{
this.baseDoc = context.DocBase;
}
public virtual void SetScorer(Scorer scorer)
{
// Do Nothing
}
}
[Test]
public void TestSparsePhrase()
{
String TEXT = "the fox did not jump";
Directory directory = NewDirectory();
IndexWriter indexWriter = new IndexWriter(directory,
NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
try
{
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.StoreTermVectorOffsets = (true);
customType.StoreTermVectorPositions = (true);
customType.StoreTermVectors = (true);
document.Add(new Field(FIELD, new TokenStreamSparse(), customType));
indexWriter.AddDocument(document);
}
finally
{
indexWriter.Dispose();
}
IndexReader indexReader = DirectoryReader.Open(directory);
try
{
assertEquals(1, indexReader.NumDocs);
IndexSearcher indexSearcher = NewSearcher(indexReader);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term(FIELD, "did"));
phraseQuery.Add(new Term(FIELD, "jump"));
phraseQuery.Slop = (0);
TopDocs hits = indexSearcher.Search(phraseQuery, 1);
assertEquals(0, hits.TotalHits);
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
TokenStream tokenStream = TokenSources
.GetTokenStream(indexReader.GetTermVector(
0, FIELD), false);
assertEquals(
highlighter.GetBestFragment(new TokenStreamSparse(), TEXT),
highlighter.GetBestFragment(tokenStream, TEXT));
}
finally
{
indexReader.Dispose();
directory.Dispose();
}
}
[Test]
public void TestSparsePhraseWithNoPositions()
{
String TEXT = "the fox did not jump";
Directory directory = NewDirectory();
IndexWriter indexWriter = new IndexWriter(directory,
NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
try
{
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.StoreTermVectorOffsets = (true);
customType.StoreTermVectors = (true);
document.Add(new Field(FIELD, TEXT, customType));
indexWriter.AddDocument(document);
}
finally
{
indexWriter.Dispose();
}
IndexReader indexReader = DirectoryReader.Open(directory);
try
{
assertEquals(1, indexReader.NumDocs);
IndexSearcher indexSearcher = NewSearcher(indexReader);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term(FIELD, "did"));
phraseQuery.Add(new Term(FIELD, "jump"));
phraseQuery.Slop = (1);
TopDocs hits = indexSearcher.Search(phraseQuery, 1);
assertEquals(1, hits.TotalHits);
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
TokenStream tokenStream = TokenSources.GetTokenStream(
indexReader.GetTermVector(0, FIELD), true);
assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter
.GetBestFragment(tokenStream, TEXT));
}
finally
{
indexReader.Dispose();
directory.Dispose();
}
}
[Test]
public void TestSparseSpan()
{
String TEXT = "the fox did not jump";
Directory directory = NewDirectory();
IndexWriter indexWriter = new IndexWriter(directory,
NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false)));
try
{
Document document = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.StoreTermVectorOffsets = (true);
customType.StoreTermVectorPositions = (true);
customType.StoreTermVectors = (true);
document.Add(new Field(FIELD, new TokenStreamSparse(), customType));
indexWriter.AddDocument(document);
}
finally
{
indexWriter.Dispose();
}
IndexReader indexReader = DirectoryReader.Open(directory);
try
{
assertEquals(1, indexReader.NumDocs);
IndexSearcher indexSearcher = NewSearcher(indexReader);
Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD, "did")),
new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
TopDocs hits = indexSearcher.Search(phraseQuery, 1);
assertEquals(0, hits.TotalHits);
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
new QueryScorer(phraseQuery));
TokenStream tokenStream = TokenSources
.GetTokenStream(indexReader.GetTermVector(
0, FIELD), false);
assertEquals(
highlighter.GetBestFragment(new TokenStreamSparse(), TEXT),
highlighter.GetBestFragment(tokenStream, TEXT));
}
finally
{
indexReader.Dispose();
directory.Dispose();
}
}
private sealed class TokenStreamSparse : TokenStream
{
private Token[] tokens;
private int i = -1;
private readonly ICharTermAttribute termAttribute;
private readonly IOffsetAttribute offsetAttribute;
private readonly IPositionIncrementAttribute positionIncrementAttribute;
public TokenStreamSparse()
{
termAttribute = AddAttribute<ICharTermAttribute>();
offsetAttribute = AddAttribute<IOffsetAttribute>();
positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
Reset();
}
public override bool IncrementToken()
{
this.i++;
if (this.i >= this.tokens.Length)
{
return false;
}
ClearAttributes();
termAttribute.SetEmpty().Append(this.tokens[i]);
offsetAttribute.SetOffset(this.tokens[i].StartOffset, this.tokens[i]
.EndOffset);
positionIncrementAttribute.PositionIncrement = (this.tokens[i]
.PositionIncrement);
return true;
}
public override void Reset()
{
this.i = -1;
this.tokens = new Token[] {
new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11),
new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) };
this.tokens[3].PositionIncrement = (2);
}
}
private sealed class TokenStreamConcurrent : TokenStream
{
private Token[] tokens;
private int i = -1;
private readonly ICharTermAttribute termAttribute;
private readonly IOffsetAttribute offsetAttribute;
private readonly IPositionIncrementAttribute positionIncrementAttribute;
public TokenStreamConcurrent()
{
termAttribute = AddAttribute<ICharTermAttribute>();
offsetAttribute = AddAttribute<IOffsetAttribute>();
positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
Reset();
}
public override bool IncrementToken()
{
this.i++;
if (this.i >= this.tokens.Length)
{
return false;
}
ClearAttributes();
termAttribute.SetEmpty().Append(this.tokens[i]);
offsetAttribute.SetOffset(this.tokens[i].StartOffset, this.tokens[i]
.EndOffset);
positionIncrementAttribute.PositionIncrement = (this.tokens[i]
.PositionIncrement);
return true;
}
public override void Reset()
{
this.i = -1;
this.tokens = new Token[] {
new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 8, 14),
new Token(new char[] { 'j', 'u', 'm', 'p', 'e', 'd' }, 0, 6, 8, 14) };
this.tokens[3].PositionIncrement = (0);
}
}
}
}