blob: 155c53aaaf2066f5b643781ceea91170caa05efe [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using NUnit.Framework;
using System;
using System.IO;
using TermInfo = Lucene.Net.Search.VectorHighlight.FieldTermStack.TermInfo;
namespace Lucene.Net.Search.VectorHighlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class IndexTimeSynonymTest : AbstractTestCase
{
[Test]
public void TestFieldTermStackIndex1wSearch1term()
{
makeIndex1w();
FieldQuery fq = new FieldQuery(tq("Mac"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
assertEquals("Mac(11,20,3)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex1wSearch2terms()
{
makeIndex1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(tq("Mac"), Occur.SHOULD);
bq.Add(tq("MacBook"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
TermInfo ti = stack.Pop();
assertEquals("Mac(11,20,3)", ti.toString());
assertEquals("MacBook(11,20,3)", ti.Next.toString());
assertSame(ti, ti.Next.Next);
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1term()
{
makeIndex1w2w();
FieldQuery fq = new FieldQuery(tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
assertEquals("pc(3,5,1)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1phrase()
{
makeIndex1w2w();
FieldQuery fq = new FieldQuery(pqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(2, stack.termList.size());
assertEquals("personal(3,5,1)", stack.Pop().toString());
assertEquals("computer(3,5,2)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1partial()
{
makeIndex1w2w();
FieldQuery fq = new FieldQuery(tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
assertEquals("computer(3,5,2)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1term1phrase()
{
makeIndex1w2w();
BooleanQuery bq = new BooleanQuery();
bq.Add(tq("pc"), Occur.SHOULD);
bq.Add(pqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(2, stack.termList.size());
TermInfo ti = stack.Pop();
assertEquals("pc(3,5,1)", ti.toString());
assertEquals("personal(3,5,1)", ti.Next.toString());
assertSame(ti, ti.Next.Next);
assertEquals("computer(3,5,2)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1term()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
assertEquals("pc(3,20,1)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1phrase()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(pqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(2, stack.termList.size());
assertEquals("personal(3,20,1)", stack.Pop().toString());
assertEquals("computer(3,20,2)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1partial()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(1, stack.termList.size());
assertEquals("computer(3,20,2)", stack.Pop().toString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1term1phrase()
{
makeIndex2w1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(tq("pc"), Occur.SHOULD);
bq.Add(pqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
assertEquals(2, stack.termList.size());
TermInfo ti = stack.Pop();
assertEquals("pc(3,20,1)", ti.toString());
assertEquals("personal(3,20,1)", ti.Next.toString());
assertSame(ti, ti.Next.Next);
assertEquals("computer(3,20,2)", stack.Pop().toString());
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1phrase()
{
makeIndex1w2w();
FieldQuery fq = new FieldQuery(pqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertEquals("personalcomputer(1.0)((3,5))", fpl.PhraseList[0].toString());
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(5, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1partial()
{
makeIndex1w2w();
FieldQuery fq = new FieldQuery(tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertEquals("computer(1.0)((3,5))", fpl.PhraseList[0].toString());
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(5, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1term1phrase()
{
makeIndex1w2w();
BooleanQuery bq = new BooleanQuery();
bq.Add(tq("pc"), Occur.SHOULD);
bq.Add(pqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertTrue(fpl.PhraseList[0].toString().IndexOf("(1.0)((3,5))", StringComparison.Ordinal) > 0);
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(5, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1term()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertEquals("pc(1.0)((3,20))", fpl.PhraseList[0].toString());
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(20, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1phrase()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(pqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertEquals("personalcomputer(1.0)((3,20))", fpl.PhraseList[0].toString());
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(20, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1partial()
{
makeIndex2w1w();
FieldQuery fq = new FieldQuery(tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertEquals("computer(1.0)((3,20))", fpl.PhraseList[0].toString());
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(20, fpl.PhraseList[0].EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1term1phrase()
{
makeIndex2w1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(tq("pc"), Occur.SHOULD);
bq.Add(pqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
assertEquals(1, fpl.PhraseList.size());
assertTrue(fpl.PhraseList[0].toString().IndexOf("(1.0)((3,20))", StringComparison.Ordinal) > 0);
assertEquals(3, fpl.PhraseList[0].StartOffset);
assertEquals(20, fpl.PhraseList[0].EndOffset);
}
private void makeIndex1w()
{
// 11111111112
// 012345678901234567890
// I'll buy a Macintosh
// Mac
// MacBook
// 0 1 2 3
makeSynonymIndex("I'll buy a Macintosh",
t("I'll", 0, 4),
t("buy", 5, 8),
t("a", 9, 10),
t("Macintosh", 11, 20), t("Mac", 11, 20, 0), t("MacBook", 11, 20, 0));
}
private void makeIndex1w2w()
{
// 1111111
// 01234567890123456
// My pc was broken
// personal computer
// 0 1 2 3
makeSynonymIndex("My pc was broken",
t("My", 0, 2),
t("pc", 3, 5), t("personal", 3, 5, 0), t("computer", 3, 5),
t("was", 6, 9),
t("broken", 10, 16));
}
private void makeIndex2w1w()
{
// 1111111111222222222233
// 01234567890123456789012345678901
// My personal computer was broken
// pc
// 0 1 2 3 4
makeSynonymIndex("My personal computer was broken",
t("My", 0, 2),
t("personal", 3, 20), t("pc", 3, 20, 0), t("computer", 3, 20),
t("was", 21, 24),
t("broken", 25, 31));
}
void makeSynonymIndex(String value, params Token[] tokens)
{
Analyzer analyzer = new TokenArrayAnalyzer(tokens);
make1dmfIndex(analyzer, value);
}
public static Token t(String text, int startOffset, int endOffset)
{
return t(text, startOffset, endOffset, 1);
}
public static Token t(String text, int startOffset, int endOffset, int positionIncrement)
{
Token token = new Token(text, startOffset, endOffset);
token.PositionIncrement = (positionIncrement);
return token;
}
internal sealed class TokenizerAnonymousClass : Tokenizer
{
private readonly Token[] tokens;
public TokenizerAnonymousClass(AttributeFactory factory, TextReader reader, Token[] tokens)
: base(factory, reader)
{
reusableToken = AddAttribute<ICharTermAttribute>();
this.tokens = tokens;
}
private ICharTermAttribute reusableToken;
private int p = 0;
public override bool IncrementToken()
{
if (p >= tokens.Length) return false;
ClearAttributes();
tokens[p++].CopyTo(reusableToken);
return true;
}
public override void Reset()
{
base.Reset();
this.p = 0;
}
}
public sealed class TokenArrayAnalyzer : Analyzer
{
internal readonly Token[] tokens;
public TokenArrayAnalyzer(params Token[] tokens)
{
this.tokens = tokens;
}
protected internal override TokenStreamComponents CreateComponents(String fieldName, TextReader reader)
{
Tokenizer ts = new TokenizerAnonymousClass(Token.TOKEN_ATTRIBUTE_FACTORY, reader, tokens);
return new TokenStreamComponents(ts);
}
}
}
}