blob: 202d95d5a0d0283f40794d47d47a1144050add5a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Util;
using NUnit.Framework;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using LowerCaseTokenizer = Lucene.Net.Analysis.LowerCaseTokenizer;
using StopFilter = Lucene.Net.Analysis.StopFilter;
using TokenFilter = Lucene.Net.Analysis.TokenFilter;
using TokenStream = Lucene.Net.Analysis.TokenStream;
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Payload = Lucene.Net.Index.Payload;
using Term = Lucene.Net.Index.Term;
using TermPositions = Lucene.Net.Index.TermPositions;
using QueryParser = Lucene.Net.QueryParsers.QueryParser;
using Directory = Lucene.Net.Store.Directory;
using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory;
using BaseTokenStreamTestCase = Lucene.Net.Test.Analysis.BaseTokenStreamTestCase;
using PayloadSpanUtil = Lucene.Net.Search.Payloads.PayloadSpanUtil;
using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
namespace Lucene.Net.Search
{
/// <summary>Term position unit test.</summary>
public class TestPositionIncrement : LuceneTestCase
{
private class AnonymousClassAnalyzer:Analyzer
{
public AnonymousClassAnalyzer(TestPositionIncrement enclosingInstance)
{
InitBlock(enclosingInstance);
}
private class AnonymousClassTokenStream:TokenStream
{
public AnonymousClassTokenStream(AnonymousClassAnalyzer enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(AnonymousClassAnalyzer enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
termAtt = AddAttribute<ITermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}
private AnonymousClassAnalyzer enclosingInstance;
public AnonymousClassAnalyzer Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private System.String[] TOKENS = new System.String[]{"1", "2", "3", "4", "5"};
private int[] INCREMENTS = new int[]{0, 2, 1, 0, 1};
private int i = 0;
internal IPositionIncrementAttribute posIncrAtt;
internal ITermAttribute termAtt;
internal IOffsetAttribute offsetAtt;
protected override void Dispose(bool disposing)
{
// do nothing
}
public override bool IncrementToken()
{
if (i == TOKENS.Length)
return false;
ClearAttributes();
termAtt.SetTermBuffer(TOKENS[i]);
offsetAtt.SetOffset(i, i);
posIncrAtt.PositionIncrement = INCREMENTS[i];
i++;
return true;
}
}
private void InitBlock(TestPositionIncrement enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private TestPositionIncrement enclosingInstance;
public TestPositionIncrement Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
return new AnonymousClassTokenStream(this);
}
}
[Test]
public virtual void TestSetPosition()
{
Analyzer analyzer = new AnonymousClassAnalyzer(this);
Directory store = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
Document d = new Document();
d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(d);
writer.Optimize();
writer.Close();
IndexSearcher searcher = new IndexSearcher(store, true);
TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1"));
pos.Next();
// first token should be at position 0
Assert.AreEqual(0, pos.NextPosition());
pos = searcher.IndexReader.TermPositions(new Term("field", "2"));
pos.Next();
// second token should be at position 2
Assert.AreEqual(2, pos.NextPosition());
PhraseQuery q;
ScoreDoc[] hits;
q = new PhraseQuery();
q.Add(new Term("field", "1"));
q.Add(new Term("field", "2"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// same as previous, just specify positions explicitely.
q = new PhraseQuery();
q.Add(new Term("field", "1"), 0);
q.Add(new Term("field", "2"), 1);
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// specifying correct positions should find the phrase.
q = new PhraseQuery();
q.Add(new Term("field", "1"), 0);
q.Add(new Term("field", "2"), 2);
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "2"));
q.Add(new Term("field", "3"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "3"));
q.Add(new Term("field", "4"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// phrase query would find it when correct positions are specified.
q = new PhraseQuery();
q.Add(new Term("field", "3"), 0);
q.Add(new Term("field", "4"), 0);
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
// phrase query should fail for non existing searched term
// even if there exist another searched terms in the same searched position.
q = new PhraseQuery();
q.Add(new Term("field", "3"), 0);
q.Add(new Term("field", "9"), 0);
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// multi-phrase query should succed for non existing searched term
// because there exist another searched terms in the same searched position.
MultiPhraseQuery mq = new MultiPhraseQuery();
mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0);
hits = searcher.Search(mq, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "2"));
q.Add(new Term("field", "4"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "3"));
q.Add(new Term("field", "5"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "4"));
q.Add(new Term("field", "5"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
q = new PhraseQuery();
q.Add(new Term("field", "2"));
q.Add(new Term("field", "5"));
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// should not find "1 2" because there is a gap of 1 in the index
QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false));
q = (PhraseQuery) qp.Parse("\"1 2\"");
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// omitted stop word cannot help because stop filter swallows the increments.
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// query parser alone won't help, because stop filter swallows the increments.
qp.EnablePositionIncrements = true;
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// stop filter alone won't help, because query parser swallows the increments.
qp.EnablePositionIncrements = false;
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(0, hits.Length);
// when both qp qnd stopFilter propagate increments, we should find the doc.
qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true));
qp.EnablePositionIncrements = true;
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
hits = searcher.Search(q, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
}
private class StopWhitespaceAnalyzer:Analyzer
{
internal bool enablePositionIncrements;
internal WhitespaceAnalyzer a = new WhitespaceAnalyzer();
public StopWhitespaceAnalyzer(bool enablePositionIncrements)
{
this.enablePositionIncrements = enablePositionIncrements;
}
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream ts = a.TokenStream(fieldName, reader);
return new StopFilter(enablePositionIncrements, ts, new CharArraySet(new List<string> {"stop"}, true));
}
}
[Test]
public virtual void TestPayloadsPos0()
{
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
System.IO.MemoryStream ms = new System.IO.MemoryStream();
System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
sw.Write("a a b c d e a f g h i j a b k k");
// flush to stream & reset it's position so it can be read
sw.Flush();
ms.Position = 0;
doc.Add(new Field("content", new System.IO.StreamReader(ms)));
writer.AddDocument(doc);
IndexReader r = writer.GetReader();
TermPositions tp = r.TermPositions(new Term("content", "a"));
int count = 0;
Assert.IsTrue(tp.Next());
// "a" occurs 4 times
Assert.AreEqual(4, tp.Freq);
int expected = 0;
Assert.AreEqual(expected, tp.NextPosition());
Assert.AreEqual(1, tp.NextPosition());
Assert.AreEqual(3, tp.NextPosition());
Assert.AreEqual(6, tp.NextPosition());
// only one doc has "a"
Assert.IsFalse(tp.Next());
IndexSearcher is_Renamed = new IndexSearcher(r);
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
SpanQuery[] sqs = new SpanQuery[] {stq1, stq2};
SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
count = 0;
bool sawZero = false;
//System.out.println("\ngetPayloadSpans test");
Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
while (pspans.Next())
{
//System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload();
sawZero |= pspans.Start() == 0;
for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
{
count++;
System.Object generatedAux2 = it.Current;
//System.out.println(new String((byte[]) it.next()));
}
}
Assert.AreEqual(5, count);
Assert.IsTrue(sawZero);
//System.out.println("\ngetSpans test");
Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
count = 0;
sawZero = false;
while (spans.Next())
{
count++;
sawZero |= spans.Start() == 0;
//System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
}
Assert.AreEqual(4, count);
Assert.IsTrue(sawZero);
//System.out.println("\nPayloadSpanUtil test");
sawZero = false;
PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);
System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq);
count = pls.Count;
for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
{
System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current));
//System.out.println(s);
sawZero |= s.Equals("pos: 0");
}
Assert.AreEqual(5, count);
Assert.IsTrue(sawZero);
writer.Close();
is_Renamed.IndexReader.Close();
dir.Close();
}
}
class TestPayloadAnalyzer:Analyzer
{
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new LowerCaseTokenizer(reader);
return new PayloadFilter(result, fieldName);
}
}
class PayloadFilter:TokenFilter
{
internal System.String fieldName;
internal int pos;
internal int i;
internal IPositionIncrementAttribute posIncrAttr;
internal IPayloadAttribute payloadAttr;
internal ITermAttribute termAttr;
public PayloadFilter(TokenStream input, System.String fieldName):base(input)
{
this.fieldName = fieldName;
pos = 0;
i = 0;
posIncrAttr = input.AddAttribute<IPositionIncrementAttribute>();
payloadAttr = input.AddAttribute<IPayloadAttribute>();
termAttr = input.AddAttribute<ITermAttribute>();
}
public override bool IncrementToken()
{
if (input.IncrementToken())
{
payloadAttr.Payload = new Payload(System.Text.UTF8Encoding.UTF8.GetBytes("pos: " + pos));
int posIncr;
if (i % 2 == 1)
{
posIncr = 1;
}
else
{
posIncr = 0;
}
posIncrAttr.PositionIncrement = posIncr;
pos += posIncr;
// System.out.println("term=" + termAttr.term() + " pos=" + pos);
i++;
return true;
}
else
{
return false;
}
}
}
}