| using Lucene.Net.Analysis; |
| using Lucene.Net.Analysis.TokenAttributes; |
| using Lucene.Net.Documents; |
| using NUnit.Framework; |
| using System; |
| using System.IO; |
| using System.Text; |
| using Assert = Lucene.Net.TestFramework.Assert; |
| using Console = Lucene.Net.Util.SystemConsole; |
| |
| namespace Lucene.Net.Search |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using AtomicReader = Lucene.Net.Index.AtomicReader; |
| using BytesRef = Lucene.Net.Util.BytesRef; |
| using Directory = Lucene.Net.Store.Directory; |
| using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum; |
| using Document = Documents.Document; |
| using Field = Field; |
| using IndexReader = Lucene.Net.Index.IndexReader; |
| using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; |
| using MultiFields = Lucene.Net.Index.MultiFields; |
| using MultiSpansWrapper = Lucene.Net.Search.Spans.MultiSpansWrapper; |
| using PayloadSpanUtil = Lucene.Net.Search.Payloads.PayloadSpanUtil; |
| using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter; |
| using SlowCompositeReaderWrapper = Lucene.Net.Index.SlowCompositeReaderWrapper; |
| using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery; |
| using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; |
| using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; |
| using Term = Lucene.Net.Index.Term; |
| using TextField = TextField; |
| |
| /// <summary> |
| /// Term position unit test. |
| /// |
| /// |
| /// </summary> |
| [TestFixture] |
| public class TestPositionIncrement : LuceneTestCase |
| { |
| // LUCENENET: This existed in Lucene, but do we really want to override the global setting? |
| //internal const bool VERBOSE = false; |
| |
| [Test] |
| public virtual void TestSetPosition() |
| { |
| Analyzer analyzer = new AnalyzerAnonymousClass(this); |
| Directory store = NewDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, store, analyzer); |
| Document d = new Document(); |
| d.Add(NewTextField("field", "bogus", Field.Store.YES)); |
| writer.AddDocument(d); |
| IndexReader reader = writer.GetReader(); |
| writer.Dispose(); |
| |
| IndexSearcher searcher = NewSearcher(reader); |
| |
| DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1")); |
| pos.NextDoc(); |
| // first token should be at position 0 |
| Assert.AreEqual(0, pos.NextPosition()); |
| |
| pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2")); |
| pos.NextDoc(); |
| // second token should be at position 2 |
| Assert.AreEqual(2, pos.NextPosition()); |
| |
| PhraseQuery q; |
| ScoreDoc[] hits; |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "1")); |
| q.Add(new Term("field", "2")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(0, hits.Length); |
| |
| // same as previous, just specify positions explicitely. |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "1"), 0); |
| q.Add(new Term("field", "2"), 1); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(0, hits.Length); |
| |
| // specifying correct positions should find the phrase. |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "1"), 0); |
| q.Add(new Term("field", "2"), 2); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "2")); |
| q.Add(new Term("field", "3")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "3")); |
| q.Add(new Term("field", "4")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(0, hits.Length); |
| |
| // phrase query would find it when correct positions are specified. |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "3"), 0); |
| q.Add(new Term("field", "4"), 0); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| // phrase query should fail for non existing searched term |
| // even if there exist another searched terms in the same searched position. |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "3"), 0); |
| q.Add(new Term("field", "9"), 0); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(0, hits.Length); |
| |
| // multi-phrase query should succed for non existing searched term |
| // because there exist another searched terms in the same searched position. |
| MultiPhraseQuery mq = new MultiPhraseQuery(); |
| mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); |
| hits = searcher.Search(mq, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "2")); |
| q.Add(new Term("field", "4")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "3")); |
| q.Add(new Term("field", "5")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "4")); |
| q.Add(new Term("field", "5")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(1, hits.Length); |
| |
| q = new PhraseQuery(); |
| q.Add(new Term("field", "2")); |
| q.Add(new Term("field", "5")); |
| hits = searcher.Search(q, null, 1000).ScoreDocs; |
| Assert.AreEqual(0, hits.Length); |
| |
| reader.Dispose(); |
| store.Dispose(); |
| } |
| |
| private class AnalyzerAnonymousClass : Analyzer |
| { |
| private readonly TestPositionIncrement outerInstance; |
| |
| public AnalyzerAnonymousClass(TestPositionIncrement outerInstance) |
| { |
| this.outerInstance = outerInstance; |
| } |
| |
| protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) |
| { |
| return new TokenStreamComponents(new TokenizerAnonymousClass(reader)); |
| } |
| |
| private class TokenizerAnonymousClass : Tokenizer |
| { |
| public TokenizerAnonymousClass(TextReader reader) |
| : base(reader) |
| { |
| TOKENS = new string[] { "1", "2", "3", "4", "5" }; |
| INCREMENTS = new int[] { 1, 2, 1, 0, 1 }; |
| i = 0; |
| posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); |
| termAtt = AddAttribute<ICharTermAttribute>(); |
| offsetAtt = AddAttribute<IOffsetAttribute>(); |
| } |
| |
| // TODO: use CannedTokenStream |
| private readonly string[] TOKENS; |
| |
| private readonly int[] INCREMENTS; |
| private int i; |
| |
| internal IPositionIncrementAttribute posIncrAtt; |
| internal ICharTermAttribute termAtt; |
| internal IOffsetAttribute offsetAtt; |
| |
| public override sealed bool IncrementToken() |
| { |
| if (i == TOKENS.Length) |
| { |
| return false; |
| } |
| ClearAttributes(); |
| termAtt.Append(TOKENS[i]); |
| offsetAtt.SetOffset(i, i); |
| posIncrAtt.PositionIncrement = INCREMENTS[i]; |
| i++; |
| return true; |
| } |
| |
| public override void Reset() |
| { |
| base.Reset(); |
| this.i = 0; |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestPayloadsPos0() |
| { |
| Directory dir = NewDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter( |
| #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION |
| this, |
| #endif |
| Random, dir, new MockPayloadAnalyzer()); |
| Document doc = new Document(); |
| doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k"))); |
| writer.AddDocument(doc); |
| |
| IndexReader readerFromWriter = writer.GetReader(); |
| AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter); |
| |
| DocsAndPositionsEnum tp = r.GetTermPositionsEnum(new Term("content", "a")); |
| |
| int count = 0; |
| Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| // "a" occurs 4 times |
| Assert.AreEqual(4, tp.Freq); |
| Assert.AreEqual(0, tp.NextPosition()); |
| Assert.AreEqual(1, tp.NextPosition()); |
| Assert.AreEqual(3, tp.NextPosition()); |
| Assert.AreEqual(6, tp.NextPosition()); |
| |
| // only one doc has "a" |
| Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc()); |
| |
| IndexSearcher @is = NewSearcher(readerFromWriter); |
| |
| SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); |
| SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); |
| SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; |
| SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); |
| |
| count = 0; |
| bool sawZero = false; |
| if (Verbose) |
| { |
| Console.WriteLine("\ngetPayloadSpans test"); |
| } |
| Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); |
| while (pspans.MoveNext()) |
| { |
| if (Verbose) |
| { |
| Console.WriteLine("doc " + pspans.Doc + ": span " + pspans.Start + " to " + pspans.End); |
| } |
| var payloads = pspans.GetPayload(); |
| sawZero |= pspans.Start == 0; |
| foreach (var bytes in payloads) |
| { |
| count++; |
| if (Verbose) |
| { |
| Console.WriteLine(" payload: " + Encoding.UTF8.GetString(bytes)); |
| } |
| } |
| } |
| Assert.IsTrue(sawZero); |
| Assert.AreEqual(5, count); |
| |
| // System.out.println("\ngetSpans test"); |
| Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq); |
| count = 0; |
| sawZero = false; |
| while (spans.MoveNext()) |
| { |
| count++; |
| sawZero |= spans.Start == 0; |
| // System.out.println(spans.Doc() + " - " + spans.Start() + " - " + |
| // spans.End()); |
| } |
| Assert.AreEqual(4, count); |
| Assert.IsTrue(sawZero); |
| |
| // System.out.println("\nPayloadSpanUtil test"); |
| |
| sawZero = false; |
| PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext); |
| var pls = psu.GetPayloadsForQuery(snq); |
| count = pls.Count; |
| foreach (var bytes in pls) |
| { |
| string s = Encoding.UTF8.GetString(bytes); |
| //System.out.println(s); |
| sawZero |= s.Equals("pos: 0", StringComparison.Ordinal); |
| } |
| Assert.AreEqual(5, count); |
| Assert.IsTrue(sawZero); |
| writer.Dispose(); |
| @is.IndexReader.Dispose(); |
| dir.Dispose(); |
| } |
| } |
| } |