blob: da2c2f9e837af08fd3168586a01ca6770e5e208a [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Search;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Util;
using NUnit.Framework;
using Attribute = Lucene.Net.Util.Attribute;
namespace Lucene.Net.Search.Vectorhighlight
{
[TestFixture]
public class IndexTimeSynonymTest : AbstractTestCase
{
[Test]
public void TestFieldTermStackIndex1wSearch1term()
{
MakeIndex1w();
FieldQuery fq = new FieldQuery(Tq("Mac"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(1, stack.termList.Count);
Assert.AreEqual("Mac(11,20,3)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex1wSearch2terms()
{
MakeIndex1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(Tq("Mac"), Occur.SHOULD);
bq.Add(Tq("MacBook"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(2, stack.termList.Count);
Dictionary<String, String> expectedSet = new Dictionary<String, String>();
expectedSet.Add("Mac(11,20,3)","");
expectedSet.Add("MacBook(11,20,3)","");
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1term()
{
MakeIndex1w2w();
FieldQuery fq = new FieldQuery(Tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(1, stack.termList.Count);
Assert.AreEqual("pc(3,5,1)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1phrase()
{
MakeIndex1w2w();
FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(2, stack.termList.Count);
Assert.AreEqual("personal(3,5,1)", stack.Pop().ToString());
Assert.AreEqual("computer(3,5,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1partial()
{
MakeIndex1w2w();
FieldQuery fq = new FieldQuery(Tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(1, stack.termList.Count);
Assert.AreEqual("computer(3,5,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex1w2wSearch1term1phrase()
{
MakeIndex1w2w();
BooleanQuery bq = new BooleanQuery();
bq.Add(Tq("pc"), Occur.SHOULD);
bq.Add(PqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(3, stack.termList.Count);
Dictionary<String, String> expectedSet = new Dictionary<String, String>();
expectedSet.Add("pc(3,5,1)","");
expectedSet.Add("personal(3,5,1)","");
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
Assert.AreEqual("computer(3,5,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1term()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(Tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(1, stack.termList.Count);
Assert.AreEqual("pc(3,20,1)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1phrase()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(2, stack.termList.Count);
Assert.AreEqual("personal(3,20,1)", stack.Pop().ToString());
Assert.AreEqual("computer(3,20,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1partial()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(Tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(1, stack.termList.Count);
Assert.AreEqual("computer(3,20,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldTermStackIndex2w1wSearch1term1phrase()
{
MakeIndex2w1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(Tq("pc"), Occur.SHOULD);
bq.Add(PqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
Assert.AreEqual(3, stack.termList.Count);
Dictionary<String, String> expectedSet = new Dictionary<String, String>();
expectedSet.Add("pc(3,20,1)","");
expectedSet.Add("personal(3,20,1)","");
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
Assert.IsTrue(expectedSet.ContainsKey(stack.Pop().ToString()));
Assert.AreEqual("computer(3,20,2)", stack.Pop().ToString());
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1phrase()
{
MakeIndex1w2w();
FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.AreEqual("personalcomputer(1.0)((3,5))", fpl.phraseList.First.Value.ToString());
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1partial()
{
MakeIndex1w2w();
FieldQuery fq = new FieldQuery(Tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.AreEqual("computer(1.0)((3,5))", fpl.phraseList.First.Value.ToString());
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex1w2wSearch1term1phrase()
{
MakeIndex1w2w();
BooleanQuery bq = new BooleanQuery();
bq.Add(Tq("pc"), Occur.SHOULD);
bq.Add(PqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.IsTrue(fpl.phraseList.First.Value.ToString().IndexOf("(1.0)((3,5))") > 0);
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(5, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1term()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(Tq("pc"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.AreEqual("pc(1.0)((3,20))", fpl.phraseList.First.Value.ToString());
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1phrase()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(PqF("personal", "computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.AreEqual("personalcomputer(1.0)((3,20))", fpl.phraseList.First.Value.ToString());
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1partial()
{
MakeIndex2w1w();
FieldQuery fq = new FieldQuery(Tq("computer"), true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.AreEqual("computer(1.0)((3,20))", fpl.phraseList.First.Value.ToString());
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
}
[Test]
public void TestFieldPhraseListIndex2w1wSearch1term1phrase()
{
MakeIndex2w1w();
BooleanQuery bq = new BooleanQuery();
bq.Add(Tq("pc"), Occur.SHOULD);
bq.Add(PqF("personal", "computer"), Occur.SHOULD);
FieldQuery fq = new FieldQuery(bq, true, true);
FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
FieldPhraseList fpl = new FieldPhraseList(stack, fq);
Assert.AreEqual(1, fpl.phraseList.Count);
Assert.IsTrue(fpl.phraseList.First.Value.ToString().IndexOf("(1.0)((3,20))") > 0);
Assert.AreEqual(3, fpl.phraseList.First.Value.StartOffset);
Assert.AreEqual(20, fpl.phraseList.First.Value.EndOffset);
}
private void MakeIndex1w()
{
// 11111111112
// 012345678901234567890
// I'll buy a Macintosh
// Mac
// MacBook
// 0 1 2 3
MakeSynonymIndex("I'll buy a Macintosh",
t("I'll", 0, 4),
t("buy", 5, 8),
t("a", 9, 10),
t("Macintosh", 11, 20), t("Mac", 11, 20, 0), t("MacBook", 11, 20, 0));
}
private void MakeIndex1w2w()
{
// 1111111
// 01234567890123456
// My pc was broken
// personal computer
// 0 1 2 3
MakeSynonymIndex("My pc was broken",
t("My", 0, 2),
t("pc", 3, 5), t("personal", 3, 5, 0), t("computer", 3, 5),
t("was", 6, 9),
t("broken", 10, 16));
}
private void MakeIndex2w1w()
{
// 1111111111222222222233
// 01234567890123456789012345678901
// My personal computer was broken
// pc
// 0 1 2 3 4
MakeSynonymIndex("My personal computer was broken",
t("My", 0, 2),
t("personal", 3, 20), t("pc", 3, 20, 0), t("computer", 3, 20),
t("was", 21, 24),
t("broken", 25, 31));
}
void MakeSynonymIndex(String value, params Token[] tokens)
{
Analyzer analyzer = new TokenArrayAnalyzer(tokens);
Make1dmfIndex(analyzer, value );
}
public static Token t(String text, int startOffset, int endOffset)
{
return t(text, startOffset, endOffset, 1);
}
public static Token t(String text, int startOffset, int endOffset, int positionIncrement)
{
Token token = new Token(text, startOffset, endOffset);
token.PositionIncrement = positionIncrement;
return token;
}
public class TokenArrayAnalyzer : Analyzer
{
Token[] tokens;
public TokenArrayAnalyzer(Token[] tokens)
{
this.tokens = tokens;
}
public override TokenStream TokenStream(String fieldName, System.IO.TextReader reader)
{
TokenStream ts = new AnonymousTokenStream(this);
return ts;
}
class AnonymousTokenStream : TokenStream
{
private Attribute reusableToken;
TokenArrayAnalyzer parent = null;
public AnonymousTokenStream(TokenArrayAnalyzer parent)
: base(Token.TOKEN_ATTRIBUTE_FACTORY)
{
this.parent = parent;
this.reusableToken = (Attribute)AddAttribute<ITermAttribute>();
}
int p = 0;
public override bool IncrementToken()
{
if (p >= parent.tokens.Length) return false;
ClearAttributes();
parent.tokens[p++].CopyTo(this.reusableToken);
return true;
}
protected override void Dispose(bool disposing)
{
// do nothing
}
}
}
}
}