blob: 4590e112a0d509f29b395f123fb0bf5f05a0a472 [file] [log] [blame]
using J2N.Collections;
using J2N.Text;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Console = Lucene.Net.Support.SystemConsole;
namespace Lucene.Net.Search.Suggest.Fst
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class FSTCompletionTest : LuceneTestCase
{
public static Input Tf(string t, int v)
{
return new Input(t, v);
}
private FSTCompletion completion;
private FSTCompletion completionAlphabetical;
public override void SetUp()
{
base.SetUp();
FSTCompletionBuilder builder = new FSTCompletionBuilder();
foreach (Input tf in EvalKeys())
{
builder.Add(tf.term, (int)tf.v);
}
completion = builder.Build();
completionAlphabetical = new FSTCompletion(completion.FST, false, true);
}
private Input[] EvalKeys()
{
Input[] keys = new Input[] {
Tf("one", 0),
Tf("oneness", 1),
Tf("onerous", 1),
Tf("onesimus", 1),
Tf("two", 1),
Tf("twofold", 1),
Tf("twonk", 1),
Tf("thrive", 1),
Tf("through", 1),
Tf("threat", 1),
Tf("three", 1),
Tf("foundation", 1),
Tf("fourblah", 1),
Tf("fourteen", 1),
Tf("four", 0),
Tf("fourier", 0),
Tf("fourty", 0),
Tf("xo", 1),
};
return keys;
}
[Test]
public void TestExactMatchHighPriority()
{
AssertMatchEquals(completion.DoLookup(StringToCharSequence("two").ToString(), 1),
"two/1.0");
}
[Test]
public void TestExactMatchLowPriority()
{
AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), 2),
"one/0.0",
"oneness/1.0");
}
[Test]
public void TestExactMatchReordering()
{
// Check reordering of exact matches.
AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
"fourier/0.0");
}
[Test]
public void TestRequestedCount()
{
// 'one' is promoted after collecting two higher ranking results.
AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), 2),
"one/0.0",
"oneness/1.0");
// 'four' is collected in a bucket and then again as an exact match.
AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 2),
"four/0.0",
"fourblah/1.0");
// Check reordering of exact matches.
AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 4),
"four/0.0",
"fourblah/1.0",
"fourteen/1.0",
"fourier/0.0");
// 'one' is at the top after collecting all alphabetical results.
AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
"one/0.0",
"oneness/1.0");
// 'one' is not promoted after collecting two higher ranking results.
FSTCompletion noPromotion = new FSTCompletion(completion.FST, true, false);
AssertMatchEquals(noPromotion.DoLookup(StringToCharSequence("one").ToString(), 2),
"oneness/1.0",
"onerous/1.0");
// 'one' is at the top after collecting all alphabetical results.
AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
"one/0.0",
"oneness/1.0");
}
[Test]
public void TestMiss()
{
AssertMatchEquals(completion.DoLookup(StringToCharSequence("xyz").ToString(), 1));
}
[Test]
public void TestAlphabeticWithWeights()
{
assertEquals(0, completionAlphabetical.DoLookup(StringToCharSequence("xyz").ToString(), 1).size());
}
[Test]
public void TestFullMatchList()
{
AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), int.MaxValue),
"oneness/1.0",
"onerous/1.0",
"onesimus/1.0",
"one/0.0");
}
[Test]
public void TestThreeByte()
{
//string key = new string(new sbyte[] {
// (sbyte) 0xF0, (sbyte) 0xA4, (sbyte) 0xAD, (sbyte) 0xA2}, 0, 4, Encoding.UTF8);
string key = Encoding.UTF8.GetString(new byte[] { 0xF0, 0xA4, 0xAD, 0xA2 });
FSTCompletionBuilder builder = new FSTCompletionBuilder();
builder.Add(new BytesRef(key), 0);
FSTCompletion lookup = builder.Build();
IEnumerable<FSTCompletion.Completion> result = lookup.DoLookup(StringToCharSequence(key).ToString(), 1);
assertEquals(1, result.Count());
}
[Test]
public void TestLargeInputConstantWeights()
{
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
Random r = Random;
List<Input> keys = new List<Input>();
for (int i = 0; i < 5000; i++)
{
keys.Add(new Input(TestUtil.RandomSimpleString(r), -1));
}
lookup.Build(new InputArrayIterator(keys));
// All the weights were constant, so all returned buckets must be constant, whatever they
// are.
long? previous = null;
foreach (Input tf in keys)
{
long? current = (Convert.ToInt64(lookup.Get(TestUtil.BytesToCharSequence(tf.term, Random).ToString())));
if (previous != null)
{
assertEquals(previous, current);
}
previous = current;
}
}
[Test]
public void TestMultilingualInput()
{
IList<Input> input = LookupBenchmarkTest.ReadTop50KWiki();
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.Build(new InputArrayIterator(input));
assertEquals(input.size(), lookup.Count);
foreach (Input tf in input)
{
assertNotNull("Not found: " + tf.term.toString(), lookup.Get(TestUtil.BytesToCharSequence(tf.term, Random).ToString()));
assertEquals(tf.term.Utf8ToString(), lookup.DoLookup(TestUtil.BytesToCharSequence(tf.term, Random).ToString(), true, 1)[0].Key.toString());
}
IList<Lookup.LookupResult> result = lookup.DoLookup(StringToCharSequence("wit").ToString(), true, 5);
assertEquals(5, result.size());
assertTrue(result[0].Key.toString().Equals("wit", StringComparison.Ordinal)); // exact match.
assertTrue(result[1].Key.toString().Equals("with", StringComparison.Ordinal)); // highest count.
}
[Test]
public void TestEmptyInput()
{
completion = new FSTCompletionBuilder().Build();
AssertMatchEquals(completion.DoLookup(StringToCharSequence("").ToString(), 10));
}
[Test]
public void TestRandom()
{
List<Input> freqs = new List<Input>();
Random rnd = Random;
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++)
{
int weight = rnd.nextInt(100);
freqs.Add(new Input("" + rnd.Next(), weight));
}
FSTCompletionLookup lookup = new FSTCompletionLookup();
lookup.Build(new InputArrayIterator(freqs.ToArray()));
foreach (Input tf in freqs)
{
string term = tf.term.Utf8ToString();
for (int i = 1; i < term.Length; i++)
{
String prefix = term.Substring(0, i - 0);
foreach (Lookup.LookupResult lr in lookup.DoLookup(StringToCharSequence(prefix).ToString(), true, 10))
{
assertTrue(lr.Key.toString().StartsWith(prefix, StringComparison.Ordinal));
}
}
}
}
private ICharSequence StringToCharSequence(string prefix)
{
return TestUtil.StringToCharSequence(prefix, Random);
}
private void AssertMatchEquals(IEnumerable<FSTCompletion.Completion> res, params string[] expected)
{
string[] result = new string[res.Count()];
for (int i = 0; i < res.Count(); i++)
{
result[i] = res.ElementAt(i).toString();
}
if (!ArrayEqualityComparer<string>.OneDimensional.Equals(StripScore(expected), StripScore(result)))
{
int colLen = Math.Max(MaxLen(expected), MaxLen(result));
StringBuilder b = new StringBuilder();
string format = "{0," + colLen + "} {1," + colLen + "}\n";
b.append(string.Format(CultureInfo.InvariantCulture, format, "Expected", "Result"));
for (int i = 0; i < Math.Max(result.Length, expected.Length); i++)
{
b.append(string.Format(CultureInfo.InvariantCulture, format,
i < expected.Length ? expected[i] : "--",
i < result.Length ? result[i] : "--"));
}
Console.WriteLine(b.toString());
fail("Expected different output:\n" + b.toString());
}
}
private string[] StripScore(string[] expected)
{
string[] result = new string[expected.Length];
for (int i = 0; i < result.Length; i++)
{
result[i] = Regex.Replace(expected[i], "\\/[0-9\\.]+", "");
}
return result;
}
private int MaxLen(string[] result)
{
int len = 0;
foreach (string s in result)
len = Math.Max(len, s.Length);
return len;
}
}
}