blob: 7bc2f14906d161fa6cf9e4a3ae4bbf41eeda8bfc [file] [log] [blame]
using J2N.Text;
using J2N.Threading;
using J2N.Threading.Atomic;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using Console = Lucene.Net.Util.SystemConsole;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search.Suggest.Analyzing
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Test requires postings offsets:
[SuppressCodecs("Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom")]
public class AnalyzingInfixSuggesterTest : LuceneTestCase
{
[Test]
public void TestBasic()
{
Input[] keys = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(keys));
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
assertEquals("lend me your <b>ear</b>", results[1].Key);
assertEquals(8, results[1].Value);
assertEquals(new BytesRef("foobar"), results[1].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your <b>ear</b>", results[0].Key);
assertEquals(8, results[0].Value);
assertEquals(new BytesRef("foobar"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
}
}
[Test]
public void TestAfterLoad()
{
Input[] keys = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
DirectoryInfo tempDir = CreateTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
try
{
suggester.Build(new InputArrayEnumerator(keys));
assertEquals(2, suggester.Count);
suggester.Dispose();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
assertEquals(2, suggester.Count);
}
finally
{
suggester.Dispose();
}
}
/// <summary>
/// Used to return highlighted result; see
/// <see cref = "Lookup.LookupResult.HighlightKey" />
/// </summary>
private sealed class LookupHighlightFragment
{
/// <summary>Portion of text for this fragment.</summary>
public readonly string text;
/** True if this text matched a part of the user's
* query. */
public readonly bool isHit;
/** Sole constructor. */
public LookupHighlightFragment(string text, bool isHit)
{
this.text = text;
this.isHit = isHit;
}
public override string ToString()
{
return "LookupHighlightFragment(text=" + text + " isHit=" + isHit + ")";
}
}
internal class TestHighlightAnalyzingInfixSuggester : AnalyzingInfixSuggester
{
public TestHighlightAnalyzingInfixSuggester(AnalyzingInfixSuggesterTest outerInstance, Analyzer a)
: base(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3)
{
}
protected internal override object Highlight(string text, ICollection<string> matchedTokens, string prefixToken)
{
TokenStream ts = m_queryAnalyzer.GetTokenStream("text", new StringReader(text));
try
{
ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>();
IOffsetAttribute offsetAtt = ts.AddAttribute<IOffsetAttribute>();
ts.Reset();
List<LookupHighlightFragment> fragments = new List<LookupHighlightFragment>();
int upto = 0;
while (ts.IncrementToken())
{
string token = termAtt.toString();
int startOffset = offsetAtt.StartOffset;
int endOffset = offsetAtt.EndOffset;
if (upto < startOffset)
{
fragments.Add(new LookupHighlightFragment(text.Substring(upto, startOffset - upto), false));
upto = startOffset;
}
else if (upto > startOffset)
{
continue;
}
if (matchedTokens.Contains(token))
{
// Token matches.
fragments.Add(new LookupHighlightFragment(text.Substring(startOffset, endOffset - startOffset), true));
upto = endOffset;
}
else if (prefixToken != null && token.StartsWith(prefixToken, StringComparison.Ordinal))
{
fragments.Add(new LookupHighlightFragment(text.Substring(startOffset, prefixToken.Length), true));
if (prefixToken.Length < token.Length)
{
fragments.Add(new LookupHighlightFragment(text.Substring(startOffset + prefixToken.Length, (startOffset + token.Length) - (startOffset + prefixToken.Length)), false));
}
upto = endOffset;
}
}
ts.End();
int endOffset2 = offsetAtt.EndOffset;
if (upto < endOffset2)
{
fragments.Add(new LookupHighlightFragment(text.Substring(upto), false));
}
return fragments;
}
finally
{
IOUtils.DisposeWhileHandlingException(ts);
}
}
}
[Test]
public void TestHighlightAsObject()
{
Input[] keys = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new TestHighlightAnalyzingInfixSuggester(this, a))
{
suggester.Build(new InputArrayEnumerator(keys));
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", ToString((List<LookupHighlightFragment>)results[0].HighlightKey));
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
}
}
private string ToString(IEnumerable<LookupHighlightFragment> fragments)
{
StringBuilder sb = new StringBuilder();
foreach (LookupHighlightFragment fragment in fragments)
{
if (fragment.isHit)
{
sb.append("<b>");
}
sb.append(fragment.text);
if (fragment.isHit)
{
sb.append("</b>");
}
}
return sb.toString();
}
[Test]
public void TestRandomMinPrefixLength()
{
Input[] keys = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
DirectoryInfo tempDir = CreateTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
int minPrefixLength = Random.nextInt(10);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixLength);
try
{
suggester.Build(new InputArrayEnumerator(keys));
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 2; j++)
{
bool doHighlight = j == 0;
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, doHighlight);
assertEquals(2, results.size());
if (doHighlight)
{
assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].Key);
}
else
{
assertEquals("a penny saved is a penny earned", results[0].Key);
}
assertEquals(10, results[0].Value);
if (doHighlight)
{
assertEquals("lend me your <b>ear</b>", results[1].Key);
}
else
{
assertEquals("lend me your ear", results[1].Key);
}
assertEquals(new BytesRef("foobaz"), results[0].Payload);
assertEquals(8, results[1].Value);
assertEquals(new BytesRef("foobar"), results[1].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random).ToString(), 10, true, doHighlight);
assertEquals(1, results.size());
if (doHighlight)
{
assertEquals("lend me your <b>ear</b>", results[0].Key);
}
else
{
assertEquals("lend me your ear", results[0].Key);
}
assertEquals(8, results[0].Value);
assertEquals(new BytesRef("foobar"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random).ToString(), 10, true, doHighlight);
assertEquals(1, results.size());
if (doHighlight)
{
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results[0].Key);
}
else
{
assertEquals("a penny saved is a penny earned", results[0].Key);
}
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random).ToString(), 10, true, doHighlight);
assertEquals(1, results.size());
if (doHighlight)
{
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results[0].Key);
}
else
{
assertEquals("a penny saved is a penny earned", results[0].Key);
}
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
}
// Make sure things still work after close and reopen:
suggester.Dispose();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixLength);
}
}
finally
{
suggester.Dispose();
}
}
[Test]
public void TestHighlight()
{
Input[] keys = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(keys));
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("penn", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results[0].Key);
}
}
internal class TestHighlightChangeCaseAnalyzingInfixSuggester : AnalyzingInfixSuggester
{
private readonly AnalyzingInfixSuggesterTest outerInstance;
public TestHighlightChangeCaseAnalyzingInfixSuggester(AnalyzingInfixSuggesterTest outerInstance, Analyzer a)
: base(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3)
{
this.outerInstance = outerInstance;
}
protected internal override void AddPrefixMatch(StringBuilder sb, string surface, string analyzed, string prefixToken)
{
sb.append("<b>");
sb.append(surface);
sb.append("</b>");
}
}
[Test]
public void TestHighlightCaseChange()
{
Input[] keys = new Input[] {
new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, true);
IList<Lookup.LookupResult> results;
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(keys));
results = suggester.DoLookup(TestUtil.StringToCharSequence("penn", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results[0].Key);
}
// Try again, but overriding addPrefixMatch to highlight
// the entire hit:
using (var suggester = new TestHighlightChangeCaseAnalyzingInfixSuggester(this, a))
{
suggester.Build(new InputArrayEnumerator(keys));
results = suggester.DoLookup(TestUtil.StringToCharSequence("penn", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>Penny</b> saved is a <b>penny</b> earned", results[0].Key);
}
}
[Test]
public void TestDoubleClose()
{
Input[] keys = new Input[] {
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(keys));
suggester.Dispose();
}
}
[Test]
public void TestSuggestStopFilter()
{
CharArraySet stopWords = StopFilter.MakeStopSet(TEST_VERSION_CURRENT, "a");
Analyzer indexAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
MockTokenizer tokens = new MockTokenizer(reader);
return new TokenStreamComponents(tokens,
new StopFilter(TEST_VERSION_CURRENT, tokens, stopWords));
});
Analyzer queryAnalyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
MockTokenizer tokens = new MockTokenizer(reader);
return new TokenStreamComponents(tokens,
new SuggestStopFilter(tokens, stopWords));
});
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), indexAnalyzer, queryAnalyzer, 3))
{
Input[] keys = new Input[] {
new Input("a bob for apples", 10, new BytesRef("foobaz")),
};
suggester.Build(new InputArrayEnumerator(keys));
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("a", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a bob for <b>a</b>pples", results[0].Key);
}
}
[Test]
public void TestEmptyAtStart()
{
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(new Input[0]));
suggester.Add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
suggester.Add(new BytesRef("lend me your ear"), null, 8, new BytesRef("foobar"));
suggester.Refresh();
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
assertEquals("lend me your <b>ear</b>", results[1].Key);
assertEquals(8, results[1].Value);
assertEquals(new BytesRef("foobar"), results[1].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your <b>ear</b>", results[0].Key);
assertEquals(8, results[0].Value);
assertEquals(new BytesRef("foobar"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
}
}
[Test]
public void TestBothExactAndPrefix()
{
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(new Input[0]));
suggester.Add(new BytesRef("the pen is pretty"), null, 10, new BytesRef("foobaz"));
suggester.Refresh();
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("pen p", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("the <b>pen</b> is <b>p</b>retty", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
}
}
private static string RandomText()
{
int numWords = TestUtil.NextInt32(Random, 1, 4);
StringBuilder b = new StringBuilder();
for (int i = 0; i < numWords; i++)
{
if (i > 0)
{
b.append(' ');
}
b.append(TestUtil.RandomSimpleString(Random, 1, 10));
}
return b.toString();
}
private class Update
{
internal long weight;
internal int index;
}
private class LookupThread : ThreadJob
{
private readonly AnalyzingInfixSuggesterTest outerInstance;
private readonly AnalyzingInfixSuggester suggester;
private readonly AtomicBoolean stop;
private Exception[] error;
public LookupThread(AnalyzingInfixSuggesterTest outerInstance, AnalyzingInfixSuggester suggester, AtomicBoolean stop, Exception[] error)
{
this.outerInstance = outerInstance;
this.suggester = suggester;
this.stop = stop;
this.error = error;
}
public override void Run()
{
#if FEATURE_THREAD_PRIORITY
Priority += 1;
#endif
while (!stop)
{
string query = RandomText();
int topN = TestUtil.NextInt32(Random, 1, 100);
bool allTermsRequired = Random.nextBoolean();
bool doHilite = Random.nextBoolean();
// We don't verify the results; just doing
// simultaneous lookups while adding/updating to
// see if there are any thread hazards:
try
{
suggester.DoLookup(TestUtil.StringToCharSequence(query, Random).ToString(),
topN, allTermsRequired, doHilite);
Thread.Sleep(10);// don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
}
catch (Exception e)
{
error[0] = e;
stop.Value = true;
}
}
}
}
/// <summary>
/// Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
/// outside the thread to show the stack trace in case of failure.
/// </summary>
private string stackTraceStr(Exception error)
{
if (error == null)
{
return "";
}
error.printStackTrace();
return error.StackTrace;
}
internal class TestRandomNRTComparer : IComparer<Input>
{
public int Compare(Input a, Input b)
{
if (a.v > b.v)
{
return -1;
}
else if (a.v < b.v)
{
return 1;
}
else
{
return 0;
}
}
}
[Test]
[Slow]
public void TestRandomNRT()
{
DirectoryInfo tempDir = CreateTempDir("AnalyzingInfixSuggesterTest");
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
int minPrefixChars = Random.nextInt(7);
if (Verbose)
{
Console.WriteLine(" minPrefixChars=" + minPrefixChars);
}
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixChars);
try
{
// Initial suggester built with nothing:
suggester.Build(new InputArrayEnumerator(new Input[0]));
var stop = new AtomicBoolean(false);
Exception[] error = new Exception[] { null };
LookupThread lookupThread = new LookupThread(this, suggester, stop, error);
lookupThread.Start();
int iters = AtLeast(1000);
int visibleUpto = 0;
ISet<long> usedWeights = new JCG.HashSet<long>();
ISet<string> usedKeys = new JCG.HashSet<string>();
List<Input> inputs = new List<Input>();
List<Update> pendingUpdates = new List<Update>();
for (int iter = 0; iter < iters; iter++)
{
string text;
while (true)
{
text = RandomText();
if (usedKeys.contains(text) == false)
{
usedKeys.add(text);
break;
}
}
// Carefully pick a weight we never used, to sidestep
// tie-break problems:
long weight;
while (true)
{
weight = Random.nextInt(10 * iters);
if (usedWeights.contains(weight) == false)
{
usedWeights.add(weight);
break;
}
}
if (inputs.size() > 0 && Random.nextInt(4) == 1)
{
// Update an existing suggestion
Update update = new Update();
update.index = Random.nextInt(inputs.size());
update.weight = weight;
Input input = inputs[update.index];
pendingUpdates.Add(update);
if (Verbose)
{
Console.WriteLine("TEST: iter=" + iter + " update input=" + input.term.Utf8ToString() + "/" + weight);
}
suggester.Update(input.term, null, weight, input.term);
}
else
{
// Add a new suggestion
inputs.Add(new Input(text, weight, new BytesRef(text)));
if (Verbose)
{
Console.WriteLine("TEST: iter=" + iter + " add input=" + text + "/" + weight);
}
BytesRef br = new BytesRef(text);
suggester.Add(br, null, weight, br);
}
if (Random.nextInt(15) == 7)
{
if (Verbose)
{
Console.WriteLine("TEST: now refresh suggester");
}
suggester.Refresh();
visibleUpto = inputs.size();
foreach (Update update in pendingUpdates)
{
Input oldInput = inputs[update.index];
Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
inputs[update.index] = newInput;
}
pendingUpdates.Clear();
}
if (Random.nextInt(50) == 7)
{
if (Verbose)
{
Console.WriteLine("TEST: now close/reopen suggester");
}
//lookupThread.Finish();
stop.Value = true;
lookupThread.Join();
Assert.Null(error[0], "Unexpcted exception at retry : \n" + stackTraceStr(error[0]));
suggester.Dispose();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, minPrefixChars);
lookupThread = new LookupThread(this, suggester, stop, error);
lookupThread.Start();
visibleUpto = inputs.size();
foreach (Update update in pendingUpdates)
{
Input oldInput = inputs[update.index];
Input newInput = new Input(oldInput.term, update.weight, oldInput.payload);
inputs[update.index] = newInput;
}
pendingUpdates.Clear();
}
if (visibleUpto > 0)
{
string query = RandomText();
bool lastPrefix = Random.nextInt(5) != 1;
if (lastPrefix == false)
{
query += " ";
}
string[] queryTerms = Regex.Split(query, "\\s", RegexOptions.Compiled).TrimEnd();
bool allTermsRequired = Random.nextInt(10) == 7;
bool doHilite = Random.nextBoolean();
if (Verbose)
{
Console.WriteLine("TEST: lookup \"" + query + "\" allTermsRequired=" + allTermsRequired + " doHilite=" + doHilite);
}
// Stupid slow but hopefully correct matching:
List<Input> expected = new List<Input>();
for (int i = 0; i < visibleUpto; i++)
{
Input input = inputs[i];
string[] inputTerms = Regex.Split(input.term.Utf8ToString(), "\\s", RegexOptions.Compiled).TrimEnd();
bool match = false;
for (int j = 0; j < queryTerms.Length; j++)
{
if (j < queryTerms.Length - 1 || lastPrefix == false)
{
// Exact match
for (int k = 0; k < inputTerms.Length; k++)
{
if (inputTerms[k].Equals(queryTerms[j], StringComparison.Ordinal))
{
match = true;
break;
}
}
}
else
{
// Prefix match
for (int k = 0; k < inputTerms.Length; k++)
{
if (inputTerms[k].StartsWith(queryTerms[j], StringComparison.Ordinal))
{
match = true;
break;
}
}
}
if (match)
{
if (allTermsRequired == false)
{
// At least one query term does match:
break;
}
match = false;
}
else if (allTermsRequired)
{
// At least one query term does not match:
break;
}
}
if (match)
{
if (doHilite)
{
expected.Add(new Input(Hilite(lastPrefix, inputTerms, queryTerms), input.v, input.term));
}
else
{
expected.Add(input);
}
}
}
expected.Sort(new TestRandomNRTComparer());
if (expected.Count > 0)
{
int topN = TestUtil.NextInt32(Random, 1, expected.size());
IList<Lookup.LookupResult> actual = suggester.DoLookup(TestUtil.StringToCharSequence(query, Random).ToString(), topN, allTermsRequired, doHilite);
int expectedCount = Math.Min(topN, expected.size());
if (Verbose)
{
Console.WriteLine(" expected:");
for (int i = 0; i < expectedCount; i++)
{
Input x = expected[i];
Console.WriteLine(" " + x.term.Utf8ToString() + "/" + x.v);
}
Console.WriteLine(" actual:");
foreach (Lookup.LookupResult result in actual)
{
Console.WriteLine(" " + result);
}
}
assertEquals(expectedCount, actual.size());
for (int i = 0; i < expectedCount; i++)
{
assertEquals(expected[i].term.Utf8ToString(), actual[i].Key.toString());
assertEquals(expected[i].v, actual[i].Value);
assertEquals(expected[i].payload, actual[i].Payload);
}
}
else
{
if (Verbose)
{
Console.WriteLine(" no expected matches");
}
}
}
}
//lookupThread.finish();
stop.Value = true;
lookupThread.Join();
Assert.Null(error[0], "Unexpcted exception at retry : \n" + stackTraceStr(error[0]));
}
finally
{
suggester.Dispose();
}
}
private static string Hilite(bool lastPrefix, string[] inputTerms, string[] queryTerms)
{
// Stupid slow but hopefully correct highlighter:
//System.out.println("hilite: lastPrefix=" + lastPrefix + " inputTerms=" + Arrays.toString(inputTerms) + " queryTerms=" + Arrays.toString(queryTerms));
StringBuilder b = new StringBuilder();
for (int i = 0; i < inputTerms.Length; i++)
{
if (i > 0)
{
b.Append(' ');
}
string inputTerm = inputTerms[i];
//System.out.println(" inputTerm=" + inputTerm);
bool matched = false;
for (int j = 0; j < queryTerms.Length; j++)
{
string queryTerm = queryTerms[j];
//System.out.println(" queryTerm=" + queryTerm);
if (j < queryTerms.Length - 1 || lastPrefix == false)
{
//System.out.println(" check exact");
if (inputTerm.Equals(queryTerm, StringComparison.Ordinal))
{
b.Append("<b>");
b.Append(inputTerm);
b.Append("</b>");
matched = true;
break;
}
}
else if (inputTerm.StartsWith(queryTerm, StringComparison.Ordinal))
{
b.Append("<b>");
b.Append(queryTerm);
b.Append("</b>");
b.Append(inputTerm.Substring(queryTerm.Length, inputTerm.Length - queryTerm.Length));
matched = true;
break;
}
}
if (matched == false)
{
b.Append(inputTerm);
}
}
return b.ToString();
}
[Test]
public void TestBasicNRT()
{
Input[] keys = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
};
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
using (AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewDirectory(), a, a, 3))
{
suggester.Build(new InputArrayEnumerator(keys));
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your <b>ear</b>", results[0].Key);
assertEquals(8, results[0].Value);
assertEquals(new BytesRef("foobar"), results[0].Payload);
// Add a new suggestion:
suggester.Add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
// Must refresh to see any newly added suggestions:
suggester.Refresh();
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
assertEquals("a penny saved is a penny <b>ear</b>ned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
assertEquals("lend me your <b>ear</b>", results[1].Key);
assertEquals(8, results[1].Value);
assertEquals(new BytesRef("foobar"), results[1].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear ", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("lend me your <b>ear</b>", results[0].Key);
assertEquals(8, results[0].Value);
assertEquals(new BytesRef("foobar"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("pen", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
results = suggester.DoLookup(TestUtil.StringToCharSequence("p", Random).ToString(), 10, true, true);
assertEquals(1, results.size());
assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results[0].Key);
assertEquals(10, results[0].Value);
assertEquals(new BytesRef("foobaz"), results[0].Payload);
// Change the weight:
suggester.Update(new BytesRef("lend me your ear"), null, 12, new BytesRef("foobox"));
// Must refresh to see any newly added suggestions:
suggester.Refresh();
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
assertEquals("lend me your <b>ear</b>", results[0].Key);
assertEquals(12, results[0].Value);
assertEquals(new BytesRef("foobox"), results[0].Payload);
assertEquals("a penny saved is a penny <b>ear</b>ned", results[1].Key);
assertEquals(10, results[1].Value);
assertEquals(new BytesRef("foobaz"), results[1].Payload);
}
}
private ISet<BytesRef> AsSet(params string[] values)
{
ISet<BytesRef> result = new JCG.HashSet<BytesRef>();
foreach (string value in values)
{
result.add(new BytesRef(value));
}
return result;
}
// LUCENE-5528
[Test]
public void TestBasicContext()
{
Input[] keys = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar"), AsSet("foo", "bar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), AsSet("foo", "baz"))
};
DirectoryInfo tempDir = CreateTempDir("analyzingInfixContext");
for (int iter = 0; iter < 2; iter++)
{
AnalyzingInfixSuggester suggester = null;
try
{
Analyzer a = new MockAnalyzer(Random, MockTokenizer.WHITESPACE, false);
if (iter == 0)
{
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
suggester.Build(new InputArrayEnumerator(keys));
}
else
{
// Test again, after close/reopen:
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, NewFSDirectory(tempDir), a, a, 3);
}
// No context provided, all results returned
IList<Lookup.LookupResult> results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), 10, true, true);
assertEquals(2, results.size());
Lookup.LookupResult result = results[0];
assertEquals("a penny saved is a penny <b>ear</b>ned", result.Key);
assertEquals(10, result.Value);
assertEquals(new BytesRef("foobaz"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("baz")));
result = results[1];
assertEquals("lend me your <b>ear</b>", result.Key);
assertEquals(8, result.Value);
assertEquals(new BytesRef("foobar"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("bar")));
// Both suggestions have "foo" context:
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), AsSet("foo"), 10, true, true);
assertEquals(2, results.size());
result = results[0];
assertEquals("a penny saved is a penny <b>ear</b>ned", result.Key);
assertEquals(10, result.Value);
assertEquals(new BytesRef("foobaz"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("baz")));
result = results[1];
assertEquals("lend me your <b>ear</b>", result.Key);
assertEquals(8, result.Value);
assertEquals(new BytesRef("foobar"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("bar")));
// Only one has "bar" context:
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), AsSet("bar"), 10, true, true);
assertEquals(1, results.size());
result = results[0];
assertEquals("lend me your <b>ear</b>", result.Key);
assertEquals(8, result.Value);
assertEquals(new BytesRef("foobar"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("bar")));
// Only one has "baz" context:
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), AsSet("baz"), 10, true, true);
assertEquals(1, results.size());
result = results[0];
assertEquals("a penny saved is a penny <b>ear</b>ned", result.Key);
assertEquals(10, result.Value);
assertEquals(new BytesRef("foobaz"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("baz")));
// Both have foo or bar:
results = suggester.DoLookup(TestUtil.StringToCharSequence("ear", Random).ToString(), AsSet("foo", "bar"), 10, true, true);
assertEquals(2, results.size());
result = results[0];
assertEquals("a penny saved is a penny <b>ear</b>ned", result.Key);
assertEquals(10, result.Value);
assertEquals(new BytesRef("foobaz"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("baz")));
result = results[1];
assertEquals("lend me your <b>ear</b>", result.Key);
assertEquals(8, result.Value);
assertEquals(new BytesRef("foobar"), result.Payload);
assertNotNull(result.Contexts);
assertEquals(2, result.Contexts.Count());
assertTrue(result.Contexts.Contains(new BytesRef("foo")));
assertTrue(result.Contexts.Contains(new BytesRef("bar")));
}
finally
{
if (suggester != null)
suggester.Dispose();
}
}
}
}
}